Skip to content

Commit

Permalink
cmd/asm/internal,cmd/internal/obj/ppc64: add alignment directive to a…
Browse files Browse the repository at this point in the history
…sm for ppc64x

This adds support for an alignment directive that can be used
within Go asm to indicate preferred code alignment for ppc64x.
This is intended to be used with loops to improve
performance.

This change only adds the directive and aligns the code based
on it. Follow up changes will modify asm functions for
ppc64x that benefit from preferred alignment.

Fixes golang#14935

Here is one example of the improvement in memmove when the
directive is used on the loops in the code:

Memmove/64      8.74ns ± 0%    8.64ns ± 0%   -1.19%  (p=0.000 n=8+8)
Memmove/128     11.5ns ± 0%    11.0ns ± 0%   -4.35%  (p=0.000 n=8+8)
Memmove/256     23.0ns ± 0%    15.3ns ± 0%  -33.48%  (p=0.000 n=8+8)
Memmove/512     31.7ns ± 0%    31.8ns ± 0%   +0.32%  (p=0.000 n=8+8)
Memmove/1024    52.3ns ± 0%    43.9ns ± 0%  -16.10%  (p=0.000 n=8+8)
Memmove/2048    93.2ns ± 0%    76.2ns ± 0%  -18.24%  (p=0.000 n=8+8)
Memmove/4096     174ns ± 0%     141ns ± 0%  -18.97%  (p=0.000 n=8+8)

Change-Id: I200d77e923dd5d78c22fe3f8eb142a8fbaff57bf
Reviewed-on: https://go-review.googlesource.com/c/144218
Run-TryBot: Lynn Boger <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
Reviewed-by: David Chase <[email protected]>
  • Loading branch information
laboger committed Oct 23, 2018
1 parent e41fbc7 commit bdba556
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 11 deletions.
22 changes: 22 additions & 0 deletions src/cmd/asm/internal/asm/asm.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,28 @@ func (p *Parser) asmPCData(operands [][]lex.Token) {
p.append(prog, "", true)
}

// asmPCAlign assembles a PCALIGN pseudo-op.
// PCALIGN $16
func (p *Parser) asmPCAlign(operands [][]lex.Token) {
if len(operands) != 1 {
p.errorf("expect one operand for PCALIGN")
return
}

// Operand 0 must be an immediate constant.
key := p.address(operands[0])
if !p.validImmediate("PCALIGN", &key) {
return
}

prog := &obj.Prog{
Ctxt: p.ctxt,
As: obj.APCALIGN,
From: key,
}
p.append(prog, "", true)
}

// asmFuncData assembles a FUNCDATA pseudo-op.
// FUNCDATA $1, funcdata<>+4(SB)
func (p *Parser) asmFuncData(operands [][]lex.Token) {
Expand Down
2 changes: 2 additions & 0 deletions src/cmd/asm/internal/asm/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@ func (p *Parser) pseudo(word string, operands [][]lex.Token) bool {
p.asmGlobl(operands)
case "PCDATA":
p.asmPCData(operands)
case "PCALIGN":
p.asmPCAlign(operands)
case "TEXT":
p.asmText(operands)
default:
Expand Down
1 change: 1 addition & 0 deletions src/cmd/internal/obj/link.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ const (
AFUNCDATA
AJMP
ANOP
APCALIGN
APCDATA
ARET
AGETCALLERPC
Expand Down
80 changes: 69 additions & 11 deletions src/cmd/internal/obj/ppc64/asm9.go
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,7 @@ var optab = []Optab{
{obj.ANOP, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0},
{obj.ADUFFZERO, C_NONE, C_NONE, C_NONE, C_LBRA, 11, 4, 0}, // same as ABR/ABL
{obj.ADUFFCOPY, C_NONE, C_NONE, C_NONE, C_LBRA, 11, 4, 0}, // same as ABR/ABL
{obj.APCALIGN, C_LCON, C_NONE, C_NONE, C_NONE, 0, 0, 0}, // align code

{obj.AXXX, C_NONE, C_NONE, C_NONE, C_NONE, 0, 4, 0},
}
Expand All @@ -610,6 +611,28 @@ var oprange [ALAST & obj.AMask][]Optab

var xcmp [C_NCLASS][C_NCLASS]bool

// padding bytes to add to align code as requested
func addpad(pc, a int64, ctxt *obj.Link) int {
switch a {
case 8:
if pc%8 != 0 {
return 4
}
case 16:
switch pc % 16 {
// When currently aligned to 4, avoid 3 NOPs and set to
// 8 byte alignment which should still help.
case 4, 12:
return 4
case 8:
return 8
}
default:
ctxt.Diag("Unexpected alignment: %d for PCALIGN directive\n", a)
}
return 0
}

func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
p := cursym.Func.Text
if p == nil || p.Link == nil { // handle external functions and ELF section symbols
Expand All @@ -632,12 +655,16 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
o = c.oplook(p)
m = int(o.size)
if m == 0 {
if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA {
c.ctxt.Diag("zero-width instruction\n%v", p)
if p.As == obj.APCALIGN {
a := c.vregoff(&p.From)
m = addpad(pc, a, ctxt)
} else {
if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA {
ctxt.Diag("zero-width instruction\n%v", p)
}
continue
}
continue
}

pc += int64(m)
}

Expand Down Expand Up @@ -686,10 +713,15 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {

m = int(o.size)
if m == 0 {
if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA {
c.ctxt.Diag("zero-width instruction\n%v", p)
if p.As == obj.APCALIGN {
a := c.vregoff(&p.From)
m = addpad(pc, a, ctxt)
} else {
if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA {
ctxt.Diag("zero-width instruction\n%v", p)
}
continue
}
continue
}

pc += int64(m)
Expand All @@ -698,7 +730,10 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
c.cursym.Size = pc
}

pc += -pc & (funcAlign - 1)
if pc%funcAlign != 0 {
pc += funcAlign - (pc % funcAlign)
}

c.cursym.Size = pc

/*
Expand All @@ -716,10 +751,19 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
if int(o.size) > 4*len(out) {
log.Fatalf("out array in span9 is too small, need at least %d for %v", o.size/4, p)
}
origsize := o.size
c.asmout(p, o, out[:])
for i = 0; i < int32(o.size/4); i++ {
c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i])
bp = bp[4:]
if origsize == 0 && o.size > 0 {
for i = 0; i < int32(o.size/4); i++ {
c.ctxt.Arch.ByteOrder.PutUint32(bp, out[0])
bp = bp[4:]
}
o.size = origsize
} else {
for i = 0; i < int32(o.size/4); i++ {
c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i])
bp = bp[4:]
}
}
}
}
Expand Down Expand Up @@ -1897,6 +1941,7 @@ func buildop(ctxt *obj.Link) {
obj.ATEXT,
obj.AUNDEF,
obj.AFUNCDATA,
obj.APCALIGN,
obj.APCDATA,
obj.ADUFFZERO,
obj.ADUFFCOPY:
Expand Down Expand Up @@ -2305,6 +2350,19 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
prasm(p)

case 0: /* pseudo ops */
if p.As == obj.APCALIGN {
aln := c.vregoff(&p.From)
v := addpad(p.Pc, aln, c.ctxt)
if v > 0 {
for i := 0; i < 6; i++ {
out[i] = uint32(0)
}
o.size = int8(v)
out[0] = LOP_RRR(OP_OR, REGZERO, REGZERO, REGZERO)
return
}
o.size = 0
}
break

case 1: /* mov r1,r2 ==> OR Rs,Rs,Ra */
Expand Down
1 change: 1 addition & 0 deletions src/cmd/internal/obj/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,7 @@ var Anames = []string{
"FUNCDATA",
"JMP",
"NOP",
"PCALIGN",
"PCDATA",
"RET",
"GETCALLERPC",
Expand Down

0 comments on commit bdba556

Please sign in to comment.