cmd/compile: leverage cc ops in more cases on ppc64x

This updates some rules to use ops with CC variations to set the condition code when the result of the operation is zero. This allows the following compare with zero to be removed since the equivalent condition code has already been set. In addition, a previous rule change to use ANDCCconst was modified to allow any constant value, not just 1 in some cases. Improvements in the reflect package benchmarks: DeepEqual/int8-4 23.9ns ± 1% 23.1ns ± 1% -3.57% (p=0.029 n=4+4) DeepEqual/[]int8-4 109ns ± 2% 102ns ± 1% -6.67% (p=0.029 n=4+4) DeepEqual/int16-4 23.8ns ± 1% 22.8ns ± 0% -3.97% (p=0.029 n=4+4) DeepEqual/[]int16-4 108ns ± 1% 102ns ± 0% -6.25% (p=0.029 n=4+4) DeepEqual/int32-4 24.9ns ± 3% 23.6ns ± 0% -5.09% (p=0.029 n=4+4) DeepEqual/[]int32-4 109ns ± 1% 103ns ± 0% -5.64% (p=0.029 n=4+4) DeepEqual/int64-4 25.5ns ± 1% 23.7ns ± 0% -7.03% (p=0.029 n=4+4) DeepEqual/[]int64-4 109ns ± 1% 102ns ± 0% -6.73% (p=0.029 n=4+4) DeepEqual/int-4 23.2ns ± 1% 22.7ns ± 0% -2.05% (p=0.029 n=4+4) DeepEqual/[]int-4 109ns ± 3% 101ns ± 0% -7.18% (p=0.029 n=4+4) DeepEqual/uint8-4 23.9ns ± 1% 23.5ns ± 0% -1.69% (p=0.029 n=4+4) DeepEqual/[]uint8-4 89.1ns ± 0% 85.6ns ± 1% -3.95% (p=0.029 n=4+4) DeepEqual/uint16-4 24.0ns ± 1% 23.8ns ± 0% -0.76% (p=0.343 n=4+4) DeepEqual/[]uint16-4 111ns ± 0% 106ns ± 4% -4.74% (p=0.029 n=4+4) DeepEqual/uint32-4 23.5ns ± 1% 23.0ns ± 0% -2.15% (p=0.029 n=4+4) DeepEqual/[]uint32-4 110ns ± 1% 104ns ± 0% -5.66% (p=0.029 n=4+4) DeepEqual/uint64-4 24.6ns ± 1% 24.3ns ± 0% -1.10% (p=0.143 n=4+4) DeepEqual/[]uint64-4 111ns ± 0% 105ns ± 1% -5.16% (p=0.029 n=4+4) DeepEqual/uint-4 23.6ns ± 0% 23.0ns ± 0% -2.70% (p=0.029 n=4+4) DeepEqual/[]uint-4 109ns ± 0% 103ns ± 1% -5.74% (p=0.029 n=4+4) DeepEqual/uintptr-4 25.1ns ± 1% 24.8ns ± 2% -1.11% (p=0.171 n=4+4) DeepEqual/[]uintptr-4 111ns ± 0% 106ns ± 1% -4.45% (p=0.029 n=4+4) DeepEqual/float32-4 22.5ns ± 0% 22.2ns ± 0% -1.29% (p=0.029 n=4+4) DeepEqual/[]float32-4 105ns ± 0% 101ns ± 1% -3.75% (p=0.029 n=4+4) DeepEqual/float64-4 22.7ns ± 2% 22.1ns ± 0% -2.52% (p=0.029 n=4+4) DeepEqual/[]float64-4 105ns ± 1% 103ns ± 1% -2.77% (p=0.029 n=4+4) DeepEqual/complex64-4 22.9ns ± 0% 22.8ns ± 0% -0.48% (p=0.029 n=4+4) DeepEqual/[]complex64-4 107ns ± 0% 101ns ± 0% -5.48% (p=0.029 n=4+4) DeepEqual/complex128-4 23.2ns ± 1% 22.6ns ± 0% -2.34% (p=0.029 n=4+4) DeepEqual/[]complex128-4 107ns ± 0% 101ns ± 0% -5.60% (p=0.029 n=4+4) DeepEqual/bool-4 22.0ns ± 1% 21.7ns ± 0% -1.44% (p=0.029 n=4+4) DeepEqual/[]bool-4 106ns ± 1% 100ns ± 0% -5.42% (p=0.029 n=4+4) DeepEqual/string-4 26.7ns ± 1% 24.7ns ± 0% -7.47% (p=0.029 n=4+4) DeepEqual/[]string-4 112ns ± 0% 107ns ± 0% -4.21% (p=0.029 n=4+4) DeepEqual/[]uint8#01-4 89.4ns ± 1% 85.5ns ± 1% -4.44% (p=0.029 n=4+4) DeepEqual/[][]uint8-4 177ns ± 0% 173ns ± 1% -2.22% (p=0.029 n=4+4) DeepEqual/[6]uint8-4 137ns ± 1% 137ns ± 0% -0.56% (p=0.057 n=4+4) DeepEqual/[][6]uint8-4 232ns ± 0% 230ns ± 1% -1.09% (p=0.029 n=4+4) Change-Id: I275624e21dc4d70001032be48897f1504cbfdd1c Reviewed-on: https://go-review.googlesource.com/c/go/+/427634 Reviewed-by: Paul Murphy <[email protected]> Reviewed-by: Dmitri Shuralyov <[email protected]> Reviewed-by: Than McIntosh <[email protected]> Reviewed-by: Archana Ravindar <[email protected]> Run-TryBot: Lynn Boger <[email protected]> TryBot-Result: Gopher Robot <[email protected]>
kavu · Oct 7, 2022 · d0b0b10 · d0b0b10
1 parent fdea8e2
commit d0b0b10
Show file tree

Hide file tree

Showing 5 changed files with 408 additions and 109 deletions.
diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go
@@ -611,7 +611,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		p.From.Reg = r2
 		p.Reg = r1
 		p.To.Type = obj.TYPE_REG
-		p.To.Reg = ppc64.REGTMP // result is not needed
+		p.To.Reg = v.Reg0()
 
 	case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
 		p := s.Prog(v.Op.Asm())

diff --git a/src/cmd/compile/internal/ssa/_gen/PPC64.rules b/src/cmd/compile/internal/ssa/_gen/PPC64.rules
@@ -404,9 +404,9 @@
 // Elide compares of bit tests
 ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] (Select0 (ANDCCconst [c] x))) yes no) => ((EQ|NE|LT|LE|GT|GE) (Select1 <types.TypeFlags> (ANDCCconst [c] x)) yes no)
 ((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] (Select0 (ANDCCconst [c] x))) yes no) => ((EQ|NE|LT|LE|GT|GE) (Select1 <types.TypeFlags> (ANDCCconst [c] x)) yes no)
-((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (ANDCC x y) yes no)
-((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(OR x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (ORCC x y) yes no)
-((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(XOR x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (XORCC x y) yes no)
+((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (Select1 <types.TypeFlags> (ANDCC x y)) yes no)
+((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(OR x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (Select1 <types.TypeFlags> (ORCC x y)) yes no)
+((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(XOR x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (Select1 <types.TypeFlags> (XORCC x y)) yes no)
 
 // Only lower after bool is lowered. It should always lower. This helps ensure the folding below happens reliably.
 (CondSelect x y bool) && flagArg(bool) == nil => (ISEL [6] x y (CMPWconst [0] bool))
@@ -907,11 +907,24 @@
 (ISEL [4] x _ (Flag(EQ|GT))) => x
 (ISEL [4] _ y (FlagLT)) => y
 
-(ISEL [2] x y ((CMP|CMPW)const [0] (Select0 (ANDCCconst [1] z)))) => (ISEL [2] x y (Select1 <types.TypeFlags> (ANDCCconst [1] z )))
-(ISEL [6] x y ((CMP|CMPW)const [0] (Select0 (ANDCCconst [1] z)))) => (ISEL [6] x y (Select1 <types.TypeFlags> (ANDCCconst [1] z )))
+(ISEL [2] x y ((CMP|CMPW)const [0] (Select0 (ANDCCconst [n] z)))) => (ISEL [2] x y (Select1 <types.TypeFlags> (ANDCCconst [n] z )))
+(ISEL [6] x y ((CMP|CMPW)const [0] (Select0 (ANDCCconst [n] z)))) => (ISEL [6] x y (Select1 <types.TypeFlags> (ANDCCconst [n] z )))
 (ISELB [2] x ((CMP|CMPW)const [0] (Select0 (ANDCCconst [1] z)))) => (XORconst [1] (Select0 <typ.UInt64> (ANDCCconst [1] z )))
 (ISELB [6] x ((CMP|CMPW)const [0] (Select0 (ANDCCconst [1] z)))) => (Select0 <typ.UInt64> (ANDCCconst [1] z ))
 
+(ISELB [2] x (CMPWconst [0] (Select0 (ANDCCconst [n] z)))) => (ISELB [2] x (Select1 <types.TypeFlags> (ANDCCconst [n] z )))
+(ISELB [6] x (CMPWconst [0] (Select0 (ANDCCconst [n] z)))) => (ISELB [6] x (Select1 <types.TypeFlags> (ANDCCconst [n] z )))
+
+// Only CMPconst for these in case AND|OR|XOR result is > 32 bits
+(ISELB [2] x (CMPconst [0] a:(AND y z))) && a.Uses == 1 => (ISELB [2] x (Select1 <types.TypeFlags> (ANDCC y z )))
+(ISELB [6] x (CMPconst [0] a:(AND y z))) && a.Uses == 1 => (ISELB [6] x (Select1 <types.TypeFlags> (ANDCC y z )))
+
+(ISELB [2] x (CMPconst [0] o:(OR y z))) && o.Uses == 1 => (ISELB [2] x (Select1 <types.TypeFlags> (ORCC y z )))
+(ISELB [6] x (CMPconst [0] o:(OR y z))) && o.Uses == 1 => (ISELB [6] x (Select1 <types.TypeFlags> (ORCC y z )))
+
+(ISELB [2] x (CMPconst [0] a:(XOR y z))) && a.Uses == 1 => (ISELB [2] x (Select1 <types.TypeFlags> (XORCC y z )))
+(ISELB [6] x (CMPconst [0] a:(XOR y z))) && a.Uses == 1 => (ISELB [6] x (Select1 <types.TypeFlags> (XORCC y z )))
+
 (ISELB [n] (MOVDconst [1]) (InvertFlags bool)) && n%4 == 0 => (ISELB [n+1] (MOVDconst [1]) bool)
 (ISELB [n] (MOVDconst [1]) (InvertFlags bool)) && n%4 == 1 => (ISELB [n-1] (MOVDconst [1]) bool)
 (ISELB [n] (MOVDconst [1]) (InvertFlags bool)) && n%4 == 2 => (ISELB [n] (MOVDconst [1]) bool)

diff --git a/src/cmd/compile/internal/ssa/_gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/_gen/PPC64Ops.go
@@ -284,27 +284,27 @@ func init() {
 		{name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"},   // move 64 bits of F register into G register
 		{name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register
 
-		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true},                    // arg0&arg1
-		{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"},                                     // arg0&^arg1
-		{name: "ANDCC", argLength: 2, reg: gp2cr, asm: "ANDCC", commutative: true, typ: "Flags"}, // arg0&arg1 sets CC
-		{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true},                      // arg0|arg1
-		{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"},                                       // arg0|^arg1
-		{name: "ORCC", argLength: 2, reg: gp2cr, asm: "ORCC", commutative: true, typ: "Flags"},   // arg0|arg1 sets CC
-		{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true},                    // ^(arg0|arg1)
-		{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true},      // arg0^arg1
-		{name: "XORCC", argLength: 2, reg: gp2cr, asm: "XORCC", commutative: true, typ: "Flags"}, // arg0^arg1 sets CC
-		{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true},      // arg0^^arg1
-		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                       // -arg0 (integer)
-		{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"},                                     // -arg0 (floating point)
-		{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},                                   // sqrt(arg0) (floating point)
-		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"},                                 // sqrt(arg0) (floating point, single precision)
-		{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"},                                   // floor(arg0), float64
-		{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"},                                    // ceil(arg0), float64
-		{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"},                                   // trunc(arg0), float64
-		{name: "FROUND", argLength: 1, reg: fp11, asm: "FRIN"},                                   // round(arg0), float64
-		{name: "FABS", argLength: 1, reg: fp11, asm: "FABS"},                                     // abs(arg0), float64
-		{name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"},                                   // -abs(arg0), float64
-		{name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"},                                 // copysign arg0 -> arg1, float64
+		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true},                                               // arg0&arg1
+		{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"},                                                                // arg0&^arg1
+		{name: "ANDCC", argLength: 2, reg: gp21, asm: "ANDCC", commutative: true, clobberFlags: true, typ: "(Int64,Flags)"}, // arg0&arg1 sets CC
+		{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true},                                                 // arg0|arg1
+		{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"},                                                                  // arg0|^arg1
+		{name: "ORCC", argLength: 2, reg: gp21, asm: "ORCC", commutative: true, clobberFlags: true, typ: "(Int,Flags)"},     // arg0|arg1 sets CC
+		{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true},                                               // ^(arg0|arg1)
+		{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true},                                 // arg0^arg1
+		{name: "XORCC", argLength: 2, reg: gp21, asm: "XORCC", commutative: true, clobberFlags: true, typ: "(Int,Flags)"},   // arg0^arg1 sets CC
+		{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true},                                 // arg0^^arg1
+		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                                                  // -arg0 (integer)
+		{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"},                                                                // -arg0 (floating point)
+		{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},                                                              // sqrt(arg0) (floating point)
+		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"},                                                            // sqrt(arg0) (floating point, single precision)
+		{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"},                                                              // floor(arg0), float64
+		{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"},                                                               // ceil(arg0), float64
+		{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"},                                                              // trunc(arg0), float64
+		{name: "FROUND", argLength: 1, reg: fp11, asm: "FRIN"},                                                              // round(arg0), float64
+		{name: "FABS", argLength: 1, reg: fp11, asm: "FABS"},                                                                // abs(arg0), float64
+		{name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"},                                                              // -abs(arg0), float64
+		{name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"},                                                            // copysign arg0 -> arg1, float64
 
 		{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"},   // arg0|aux
 		{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"}, // arg0^aux

diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go