Skip to content

Commit

Permalink
cmd/compile: leverage cc ops in more cases on ppc64x
Browse files Browse the repository at this point in the history
This updates some rules to use ops with CC variations to
set the condition code when the result of the operation is
zero. This allows the following compare with zero to be
removed since the equivalent condition code has already been
set.

In addition, a previous rule change to use ANDCCconst was modified
to allow any constant value, not just 1 in some cases.

Improvements in the reflect package benchmarks:

DeepEqual/int8-4                    23.9ns ± 1%    23.1ns ± 1%   -3.57%  (p=0.029 n=4+4)
DeepEqual/[]int8-4                   109ns ± 2%     102ns ± 1%   -6.67%  (p=0.029 n=4+4)
DeepEqual/int16-4                   23.8ns ± 1%    22.8ns ± 0%   -3.97%  (p=0.029 n=4+4)
DeepEqual/[]int16-4                  108ns ± 1%     102ns ± 0%   -6.25%  (p=0.029 n=4+4)
DeepEqual/int32-4                   24.9ns ± 3%    23.6ns ± 0%   -5.09%  (p=0.029 n=4+4)
DeepEqual/[]int32-4                  109ns ± 1%     103ns ± 0%   -5.64%  (p=0.029 n=4+4)
DeepEqual/int64-4                   25.5ns ± 1%    23.7ns ± 0%   -7.03%  (p=0.029 n=4+4)
DeepEqual/[]int64-4                  109ns ± 1%     102ns ± 0%   -6.73%  (p=0.029 n=4+4)
DeepEqual/int-4                     23.2ns ± 1%    22.7ns ± 0%   -2.05%  (p=0.029 n=4+4)
DeepEqual/[]int-4                    109ns ± 3%     101ns ± 0%   -7.18%  (p=0.029 n=4+4)
DeepEqual/uint8-4                   23.9ns ± 1%    23.5ns ± 0%   -1.69%  (p=0.029 n=4+4)
DeepEqual/[]uint8-4                 89.1ns ± 0%    85.6ns ± 1%   -3.95%  (p=0.029 n=4+4)
DeepEqual/uint16-4                  24.0ns ± 1%    23.8ns ± 0%   -0.76%  (p=0.343 n=4+4)
DeepEqual/[]uint16-4                 111ns ± 0%     106ns ± 4%   -4.74%  (p=0.029 n=4+4)
DeepEqual/uint32-4                  23.5ns ± 1%    23.0ns ± 0%   -2.15%  (p=0.029 n=4+4)
DeepEqual/[]uint32-4                 110ns ± 1%     104ns ± 0%   -5.66%  (p=0.029 n=4+4)
DeepEqual/uint64-4                  24.6ns ± 1%    24.3ns ± 0%   -1.10%  (p=0.143 n=4+4)
DeepEqual/[]uint64-4                 111ns ± 0%     105ns ± 1%   -5.16%  (p=0.029 n=4+4)
DeepEqual/uint-4                    23.6ns ± 0%    23.0ns ± 0%   -2.70%  (p=0.029 n=4+4)
DeepEqual/[]uint-4                   109ns ± 0%     103ns ± 1%   -5.74%  (p=0.029 n=4+4)
DeepEqual/uintptr-4                 25.1ns ± 1%    24.8ns ± 2%   -1.11%  (p=0.171 n=4+4)
DeepEqual/[]uintptr-4                111ns ± 0%     106ns ± 1%   -4.45%  (p=0.029 n=4+4)
DeepEqual/float32-4                 22.5ns ± 0%    22.2ns ± 0%   -1.29%  (p=0.029 n=4+4)
DeepEqual/[]float32-4                105ns ± 0%     101ns ± 1%   -3.75%  (p=0.029 n=4+4)
DeepEqual/float64-4                 22.7ns ± 2%    22.1ns ± 0%   -2.52%  (p=0.029 n=4+4)
DeepEqual/[]float64-4                105ns ± 1%     103ns ± 1%   -2.77%  (p=0.029 n=4+4)
DeepEqual/complex64-4               22.9ns ± 0%    22.8ns ± 0%   -0.48%  (p=0.029 n=4+4)
DeepEqual/[]complex64-4              107ns ± 0%     101ns ± 0%   -5.48%  (p=0.029 n=4+4)
DeepEqual/complex128-4              23.2ns ± 1%    22.6ns ± 0%   -2.34%  (p=0.029 n=4+4)
DeepEqual/[]complex128-4             107ns ± 0%     101ns ± 0%   -5.60%  (p=0.029 n=4+4)
DeepEqual/bool-4                    22.0ns ± 1%    21.7ns ± 0%   -1.44%  (p=0.029 n=4+4)
DeepEqual/[]bool-4                   106ns ± 1%     100ns ± 0%   -5.42%  (p=0.029 n=4+4)
DeepEqual/string-4                  26.7ns ± 1%    24.7ns ± 0%   -7.47%  (p=0.029 n=4+4)
DeepEqual/[]string-4                 112ns ± 0%     107ns ± 0%   -4.21%  (p=0.029 n=4+4)
DeepEqual/[]uint8#01-4              89.4ns ± 1%    85.5ns ± 1%   -4.44%  (p=0.029 n=4+4)
DeepEqual/[][]uint8-4                177ns ± 0%     173ns ± 1%   -2.22%  (p=0.029 n=4+4)
DeepEqual/[6]uint8-4                 137ns ± 1%     137ns ± 0%   -0.56%  (p=0.057 n=4+4)
DeepEqual/[][6]uint8-4               232ns ± 0%     230ns ± 1%   -1.09%  (p=0.029 n=4+4)

Change-Id: I275624e21dc4d70001032be48897f1504cbfdd1c
Reviewed-on: https://go-review.googlesource.com/c/go/+/427634
Reviewed-by: Paul Murphy <[email protected]>
Reviewed-by: Dmitri Shuralyov <[email protected]>
Reviewed-by: Than McIntosh <[email protected]>
Reviewed-by: Archana Ravindar <[email protected]>
Run-TryBot: Lynn Boger <[email protected]>
TryBot-Result: Gopher Robot <[email protected]>
  • Loading branch information
laboger committed Oct 7, 2022
1 parent fdea8e2 commit d0b0b10
Show file tree
Hide file tree
Showing 5 changed files with 408 additions and 109 deletions.
2 changes: 1 addition & 1 deletion src/cmd/compile/internal/ppc64/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.From.Reg = r2
p.Reg = r1
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REGTMP // result is not needed
p.To.Reg = v.Reg0()

case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
p := s.Prog(v.Op.Asm())
Expand Down
23 changes: 18 additions & 5 deletions src/cmd/compile/internal/ssa/_gen/PPC64.rules
Original file line number Diff line number Diff line change
Expand Up @@ -404,9 +404,9 @@
// Elide compares of bit tests
((EQ|NE|LT|LE|GT|GE) (CMPconst [0] (Select0 (ANDCCconst [c] x))) yes no) => ((EQ|NE|LT|LE|GT|GE) (Select1 <types.TypeFlags> (ANDCCconst [c] x)) yes no)
((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] (Select0 (ANDCCconst [c] x))) yes no) => ((EQ|NE|LT|LE|GT|GE) (Select1 <types.TypeFlags> (ANDCCconst [c] x)) yes no)
((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (ANDCC x y) yes no)
((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(OR x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (ORCC x y) yes no)
((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(XOR x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (XORCC x y) yes no)
((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (Select1 <types.TypeFlags> (ANDCC x y)) yes no)
((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(OR x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (Select1 <types.TypeFlags> (ORCC x y)) yes no)
((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(XOR x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (Select1 <types.TypeFlags> (XORCC x y)) yes no)

// Only lower after bool is lowered. It should always lower. This helps ensure the folding below happens reliably.
(CondSelect x y bool) && flagArg(bool) == nil => (ISEL [6] x y (CMPWconst [0] bool))
Expand Down Expand Up @@ -907,11 +907,24 @@
(ISEL [4] x _ (Flag(EQ|GT))) => x
(ISEL [4] _ y (FlagLT)) => y

(ISEL [2] x y ((CMP|CMPW)const [0] (Select0 (ANDCCconst [1] z)))) => (ISEL [2] x y (Select1 <types.TypeFlags> (ANDCCconst [1] z )))
(ISEL [6] x y ((CMP|CMPW)const [0] (Select0 (ANDCCconst [1] z)))) => (ISEL [6] x y (Select1 <types.TypeFlags> (ANDCCconst [1] z )))
(ISEL [2] x y ((CMP|CMPW)const [0] (Select0 (ANDCCconst [n] z)))) => (ISEL [2] x y (Select1 <types.TypeFlags> (ANDCCconst [n] z )))
(ISEL [6] x y ((CMP|CMPW)const [0] (Select0 (ANDCCconst [n] z)))) => (ISEL [6] x y (Select1 <types.TypeFlags> (ANDCCconst [n] z )))
(ISELB [2] x ((CMP|CMPW)const [0] (Select0 (ANDCCconst [1] z)))) => (XORconst [1] (Select0 <typ.UInt64> (ANDCCconst [1] z )))
(ISELB [6] x ((CMP|CMPW)const [0] (Select0 (ANDCCconst [1] z)))) => (Select0 <typ.UInt64> (ANDCCconst [1] z ))

(ISELB [2] x (CMPWconst [0] (Select0 (ANDCCconst [n] z)))) => (ISELB [2] x (Select1 <types.TypeFlags> (ANDCCconst [n] z )))
(ISELB [6] x (CMPWconst [0] (Select0 (ANDCCconst [n] z)))) => (ISELB [6] x (Select1 <types.TypeFlags> (ANDCCconst [n] z )))

// Only CMPconst for these in case AND|OR|XOR result is > 32 bits
(ISELB [2] x (CMPconst [0] a:(AND y z))) && a.Uses == 1 => (ISELB [2] x (Select1 <types.TypeFlags> (ANDCC y z )))
(ISELB [6] x (CMPconst [0] a:(AND y z))) && a.Uses == 1 => (ISELB [6] x (Select1 <types.TypeFlags> (ANDCC y z )))

(ISELB [2] x (CMPconst [0] o:(OR y z))) && o.Uses == 1 => (ISELB [2] x (Select1 <types.TypeFlags> (ORCC y z )))
(ISELB [6] x (CMPconst [0] o:(OR y z))) && o.Uses == 1 => (ISELB [6] x (Select1 <types.TypeFlags> (ORCC y z )))

(ISELB [2] x (CMPconst [0] a:(XOR y z))) && a.Uses == 1 => (ISELB [2] x (Select1 <types.TypeFlags> (XORCC y z )))
(ISELB [6] x (CMPconst [0] a:(XOR y z))) && a.Uses == 1 => (ISELB [6] x (Select1 <types.TypeFlags> (XORCC y z )))

(ISELB [n] (MOVDconst [1]) (InvertFlags bool)) && n%4 == 0 => (ISELB [n+1] (MOVDconst [1]) bool)
(ISELB [n] (MOVDconst [1]) (InvertFlags bool)) && n%4 == 1 => (ISELB [n-1] (MOVDconst [1]) bool)
(ISELB [n] (MOVDconst [1]) (InvertFlags bool)) && n%4 == 2 => (ISELB [n] (MOVDconst [1]) bool)
Expand Down
42 changes: 21 additions & 21 deletions src/cmd/compile/internal/ssa/_gen/PPC64Ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,27 +284,27 @@ func init() {
{name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"}, // move 64 bits of F register into G register
{name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register

{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0&arg1
{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"}, // arg0&^arg1
{name: "ANDCC", argLength: 2, reg: gp2cr, asm: "ANDCC", commutative: true, typ: "Flags"}, // arg0&arg1 sets CC
{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true}, // arg0|arg1
{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"}, // arg0|^arg1
{name: "ORCC", argLength: 2, reg: gp2cr, asm: "ORCC", commutative: true, typ: "Flags"}, // arg0|arg1 sets CC
{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true}, // ^(arg0|arg1)
{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true}, // arg0^arg1
{name: "XORCC", argLength: 2, reg: gp2cr, asm: "XORCC", commutative: true, typ: "Flags"}, // arg0^arg1 sets CC
{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true}, // arg0^^arg1
{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0 (integer)
{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"}, // -arg0 (floating point)
{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"}, // sqrt(arg0) (floating point)
{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0) (floating point, single precision)
{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"}, // floor(arg0), float64
{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"}, // ceil(arg0), float64
{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"}, // trunc(arg0), float64
{name: "FROUND", argLength: 1, reg: fp11, asm: "FRIN"}, // round(arg0), float64
{name: "FABS", argLength: 1, reg: fp11, asm: "FABS"}, // abs(arg0), float64
{name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"}, // -abs(arg0), float64
{name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"}, // copysign arg0 -> arg1, float64
{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0&arg1
{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"}, // arg0&^arg1
{name: "ANDCC", argLength: 2, reg: gp21, asm: "ANDCC", commutative: true, clobberFlags: true, typ: "(Int64,Flags)"}, // arg0&arg1 sets CC
{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true}, // arg0|arg1
{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"}, // arg0|^arg1
{name: "ORCC", argLength: 2, reg: gp21, asm: "ORCC", commutative: true, clobberFlags: true, typ: "(Int,Flags)"}, // arg0|arg1 sets CC
{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true}, // ^(arg0|arg1)
{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true}, // arg0^arg1
{name: "XORCC", argLength: 2, reg: gp21, asm: "XORCC", commutative: true, clobberFlags: true, typ: "(Int,Flags)"}, // arg0^arg1 sets CC
{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true}, // arg0^^arg1
{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0 (integer)
{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"}, // -arg0 (floating point)
{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"}, // sqrt(arg0) (floating point)
{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0) (floating point, single precision)
{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"}, // floor(arg0), float64
{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"}, // ceil(arg0), float64
{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"}, // trunc(arg0), float64
{name: "FROUND", argLength: 1, reg: fp11, asm: "FRIN"}, // round(arg0), float64
{name: "FABS", argLength: 1, reg: fp11, asm: "FABS"}, // abs(arg0), float64
{name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"}, // -abs(arg0), float64
{name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"}, // copysign arg0 -> arg1, float64

{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"}, // arg0|aux
{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"}, // arg0^aux
Expand Down
36 changes: 24 additions & 12 deletions src/cmd/compile/internal/ssa/opGen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit d0b0b10

Please sign in to comment.