Skip to content

Commit

Permalink
cmd/compile: improve rules for PPC64.rules
Browse files Browse the repository at this point in the history
This adds some improvements to the rules for PPC64 to eliminate
unnecessary zero or sign extends, and fix some rule for truncates
which were not always using the correct sign instruction.

This reduces of size of many functions by 1 or 2 instructions and
can improve performance in cases where the execution time depends
on small loops where at least 1 instruction was removed and where that
loop contributes a significant amount of the total execution time.

Included is a testcase for codegen to verify the sign/zero extend
instructions are omitted.

An example of the improvement (strings):
IndexAnyASCII/256:1-16     392ns ± 0%   369ns ± 0%  -5.79%  (p=0.000 n=1+10)
IndexAnyASCII/256:2-16     397ns ± 0%   376ns ± 0%  -5.23%  (p=0.000 n=1+9)
IndexAnyASCII/256:4-16     405ns ± 0%   384ns ± 0%  -5.19%  (p=1.714 n=1+6)
IndexAnyASCII/256:8-16     427ns ± 0%   403ns ± 0%  -5.57%  (p=0.000 n=1+10)
IndexAnyASCII/256:16-16    441ns ± 0%   418ns ± 1%  -5.33%  (p=0.000 n=1+10)
IndexAnyASCII/4096:1-16   5.62µs ± 0%  5.27µs ± 1%  -6.31%  (p=0.000 n=1+10)
IndexAnyASCII/4096:2-16   5.67µs ± 0%  5.29µs ± 0%  -6.67%  (p=0.222 n=1+8)
IndexAnyASCII/4096:4-16   5.66µs ± 0%  5.28µs ± 1%  -6.66%  (p=0.000 n=1+10)
IndexAnyASCII/4096:8-16   5.66µs ± 0%  5.31µs ± 1%  -6.10%  (p=0.000 n=1+10)
IndexAnyASCII/4096:16-16  5.70µs ± 0%  5.33µs ± 1%  -6.43%  (p=0.182 n=1+10)

Change-Id: I739a6132b505936d39001aada5a978ff2a5f0500
Reviewed-on: https://go-review.googlesource.com/129875
Reviewed-by: David Chase <[email protected]>
  • Loading branch information
laboger committed Sep 13, 2018
1 parent 8eb36ae commit 8dbd9af
Showing 3 changed files with 1,440 additions and 69 deletions.
77 changes: 65 additions & 12 deletions src/cmd/compile/internal/ssa/gen/PPC64.rules
Original file line number Diff line number Diff line change
@@ -660,14 +660,51 @@
(MOVWreg y:(AND (MOVDconst [c]) _)) && uint64(c) <= 0x7FFFFFFF -> y

// small and of zero-extend -> either zero-extend or small and
// degenerate-and
(ANDconst [c] y:(MOVBZreg _)) && c&0xFF == 0xFF -> y
(ANDconst [0xFF] y:(MOVBreg _)) -> y
(ANDconst [c] y:(MOVHZreg _)) && c&0xFFFF == 0xFFFF -> y
(ANDconst [c] y:(MOVWZreg _)) && c&0xFFFFFFFF == 0xFFFFFFFF -> y
// normal case
(ANDconst [c] (MOVBZreg x)) -> (ANDconst [c&0xFF] x)
(ANDconst [c] (MOVHZreg x)) -> (ANDconst [c&0xFFFF] x)
(ANDconst [c] (MOVWZreg x)) -> (ANDconst [c&0xFFFFFFFF] x)
(ANDconst [0xFFFF] y:(MOVHreg _)) -> y

(AND (MOVDconst [c]) y:(MOVWZreg _)) && c&0xFFFFFFFF == 0xFFFFFFFF -> y
(AND (MOVDconst [0xFFFFFFFF]) y:(MOVWreg x)) -> (MOVWZreg x)
// normal case
(ANDconst [c] (MOV(B|BZ)reg x)) -> (ANDconst [c&0xFF] x)
(ANDconst [c] (MOV(H|HZ)reg x)) -> (ANDconst [c&0xFFFF] x)
(ANDconst [c] (MOV(W|WZ)reg x)) -> (ANDconst [c&0xFFFFFFFF] x)

// Eliminate unnecessary sign/zero extend following right shift
(MOV(B|H|W)Zreg (SRWconst [c] (MOVBZreg x))) -> (SRWconst [c] (MOVBZreg x))
(MOV(H|W)Zreg (SRWconst [c] (MOVHZreg x))) -> (SRWconst [c] (MOVHZreg x))
(MOVWZreg (SRWconst [c] (MOVWZreg x))) -> (SRWconst [c] (MOVWZreg x))
(MOV(B|H|W)reg (SRAWconst [c] (MOVBreg x))) -> (SRAWconst [c] (MOVBreg x))
(MOV(H|W)reg (SRAWconst [c] (MOVHreg x))) -> (SRAWconst [c] (MOVHreg x))
(MOVWreg (SRAWconst [c] (MOVWreg x))) -> (SRAWconst [c] (MOVWreg x))

(MOVWZreg (SRWconst [c] x)) && sizeof(x.Type) <= 32 -> (SRWconst [c] x)
(MOVHZreg (SRWconst [c] x)) && sizeof(x.Type) <= 16 -> (SRWconst [c] x)
(MOVBZreg (SRWconst [c] x)) && sizeof(x.Type) == 8 -> (SRWconst [c] x)
(MOVWreg (SRAWconst [c] x)) && sizeof(x.Type) <= 32 -> (SRAWconst [c] x)
(MOVHreg (SRAWconst [c] x)) && sizeof(x.Type) <= 16 -> (SRAWconst [c] x)
(MOVBreg (SRAWconst [c] x)) && sizeof(x.Type) == 8 -> (SRAWconst [c] x)

// initial right shift will handle sign/zero extend
(MOVBZreg (SRDconst [c] x)) && c>=56 -> (SRDconst [c] x)
(MOVBreg (SRDconst [c] x)) && c>56 -> (SRDconst [c] x)
(MOVBreg (SRDconst [c] x)) && c==56 -> (SRADconst [c] x)
(MOVBZreg (SRWconst [c] x)) && c>=24 -> (SRWconst [c] x)
(MOVBreg (SRWconst [c] x)) && c>24 -> (SRWconst [c] x)
(MOVBreg (SRWconst [c] x)) && c==24 -> (SRAWconst [c] x)

(MOVHZreg (SRDconst [c] x)) && c>=48 -> (SRDconst [c] x)
(MOVHreg (SRDconst [c] x)) && c>48 -> (SRDconst [c] x)
(MOVHreg (SRDconst [c] x)) && c==48 -> (SRADconst [c] x)
(MOVHZreg (SRWconst [c] x)) && c>=16 -> (SRWconst [c] x)
(MOVHreg (SRWconst [c] x)) && c>16 -> (SRWconst [c] x)
(MOVHreg (SRWconst [c] x)) && c==16 -> (SRAWconst [c] x)

(MOVWZreg (SRDconst [c] x)) && c>=32 -> (SRDconst [c] x)
(MOVWreg (SRDconst [c] x)) && c>32 -> (SRDconst [c] x)
(MOVWreg (SRDconst [c] x)) && c==32 -> (SRADconst [c] x)

// Various redundant zero/sign extension combinations.
(MOVBZreg y:(MOVBZreg _)) -> y // repeat
@@ -851,22 +888,38 @@
(ZeroExt16to(32|64) x) -> (MOVHZreg x)
(ZeroExt32to64 x) -> (MOVWZreg x)

(Trunc(16|32|64)to8 x) -> (MOVBreg x)
(Trunc(32|64)to16 x) -> (MOVHreg x)
(Trunc64to32 x) -> (MOVWreg x)
(Trunc(16|32|64)to8 x) && isSigned(x.Type) -> (MOVBreg x)
(Trunc(16|32|64)to8 x) -> (MOVBZreg x)
(Trunc(32|64)to16 x) && isSigned(x.Type) -> (MOVHreg x)
(Trunc(32|64)to16 x) -> (MOVHZreg x)
(Trunc64to32 x) && isSigned(x.Type) -> (MOVWreg x)
(Trunc64to32 x) -> (MOVWZreg x)

(Slicemask <t> x) -> (SRADconst (NEG <t> x) [63])

// Note that MOV??reg returns a 64-bit int, x is not necessarily that wide
// This may interact with other patterns in the future. (Compare with arm64)
(MOVBZreg x:(MOVBZload _ _)) -> x
(MOVHZreg x:(MOVHZload _ _)) -> x
(MOVHreg x:(MOVHload _ _)) -> x
(MOV(B|H|W)Zreg x:(MOVBZload _ _)) -> x
(MOV(H|W)Zreg x:(MOVHZload _ _)) -> x
(MOV(H|W)reg x:(MOVHload _ _)) -> x
(MOVWZreg x:(MOVWZload _ _)) -> x
(MOVWreg x:(MOVWload _ _)) -> x

// don't extend if argument is already extended
(MOVBreg x:(Arg <t>)) && is8BitInt(t) && isSigned(t) -> x
(MOVBZreg x:(Arg <t>)) && is8BitInt(t) && !isSigned(t) -> x
(MOVHreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && isSigned(t) -> x
(MOVHZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && !isSigned(t) -> x
(MOVWreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t) -> x
(MOVWZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t) -> x

(MOVBZreg (MOVDconst [c])) -> (MOVDconst [int64(uint8(c))])
(MOVBreg (MOVDconst [c])) -> (MOVDconst [int64(int8(c))])
(MOVHZreg (MOVDconst [c])) -> (MOVDconst [int64(uint16(c))])
(MOVHreg (MOVDconst [c])) -> (MOVDconst [int64(int16(c))])
(MOVWreg (MOVDconst [c])) -> (MOVDconst [int64(int32(c))])
(MOVWZreg (MOVDconst [c])) -> (MOVDconst [int64(uint32(c))])


// Lose widening ops fed to to stores
(MOVBstore [off] {sym} ptr (MOV(B|BZ|H|HZ|W|WZ)reg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
Loading
Oops, something went wrong.

0 comments on commit 8dbd9af

Please sign in to comment.