diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index ab1de2447b76e8..a7e0c2f863abb3 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -480,6 +480,19 @@ const ( // instruction word. R_POWER_TLS_LE + // R_POWER_TLS_IE is used to implement the "initial exec" model for tls access. It + // relocates a D-form, DS-form instruction sequence like R_ADDRPOWER_DS. It + // inserts to the offset of GOT slot for the thread-local symbol from the TOC (the + // GOT slot is filled by the dynamic linker with the offset of the thread-local + // symbol from the thread pointer (R13)). + R_POWER_TLS_IE + + // R_POWER_TLS marks an X-form instruction such as "MOVD 0(R13)(R31*1), g" as + // accessing a particular thread-local symbol. It does not affect code generation + // but is used by the system linker when relaxing "initial exec" model code to + // "local exec" model code. + R_POWER_TLS + // R_ADDRPOWER_DS is similar to R_ADDRPOWER above, but assumes the second // instruction is a "DS-form" instruction, which has an immediate field occupying // bits [15:2] of the instruction word. Bits [15:2] of the address of the diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go index 3fd4685769fe8d..d28e1e895e5a4a 100644 --- a/src/cmd/internal/obj/ppc64/a.out.go +++ b/src/cmd/internal/obj/ppc64/a.out.go @@ -222,6 +222,7 @@ const ( C_GOK C_ADDR C_TLS_LE + C_TLS_IE C_TEXTSIZE C_NCLASS /* must be the last */ diff --git a/src/cmd/internal/obj/ppc64/anames9.go b/src/cmd/internal/obj/ppc64/anames9.go index 62125a4e523484..1b5d564dfecf9f 100644 --- a/src/cmd/internal/obj/ppc64/anames9.go +++ b/src/cmd/internal/obj/ppc64/anames9.go @@ -36,6 +36,7 @@ var cnames9 = []string{ "GOK", "ADDR", "TLS_LE", + "TLS_IE", "TEXTSIZE", "NCLASS", } diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index c226ed540db125..25b9aef83000c5 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -246,6 +246,7 @@ var optab = []Optab{ {AMOVB, C_ADDR, C_NONE, C_NONE, C_REG, 76, 12, 0}, {AMOVD, C_TLS_LE, C_NONE, C_NONE, C_REG, 79, 4, 0}, + {AMOVD, C_TLS_IE, C_NONE, C_NONE, C_REG, 80, 8, 0}, /* load constant */ {AMOVD, C_SECON, C_NONE, C_NONE, C_REG, 3, 4, REGSB}, @@ -587,7 +588,11 @@ func aclass(ctxt *obj.Link, a *obj.Addr) int { ctxt.Instoffset = a.Offset if a.Sym != nil { // use relocation if a.Sym.Type == obj.STLSBSS { - return C_TLS_LE + if ctxt.Flag_shared != 0 { + return C_TLS_IE + } else { + return C_TLS_LE + } } return C_ADDR } @@ -1652,6 +1657,18 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { if v != 0 { ctxt.Diag("illegal indexed instruction\n%v", p) } + if ctxt.Flag_shared != 0 && r == REG_R13 { + rel := obj.Addrel(ctxt.Cursym) + rel.Off = int32(ctxt.Pc) + rel.Siz = 4 + // This (and the matching part in the load case + // below) are the only places in the ppc64 toolchain + // that knows the name of the tls variable. Possibly + // we could add some assembly syntax so that the name + // of the variable does not have to be assumed. + rel.Sym = obj.Linklookup(ctxt, "runtime.tls_g", 0) + rel.Type = obj.R_POWER_TLS + } o1 = AOP_RRR(uint32(opstorex(ctxt, int(p.As))), uint32(p.From.Reg), uint32(p.To.Index), uint32(r)) } else { if int32(int16(v)) != v { @@ -1671,6 +1688,13 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { if v != 0 { ctxt.Diag("illegal indexed instruction\n%v", p) } + if ctxt.Flag_shared != 0 && r == REG_R13 { + rel := obj.Addrel(ctxt.Cursym) + rel.Off = int32(ctxt.Pc) + rel.Siz = 4 + rel.Sym = obj.Linklookup(ctxt, "runtime.tls_g", 0) + rel.Type = obj.R_POWER_TLS + } o1 = AOP_RRR(uint32(oploadx(ctxt, int(p.As))), uint32(p.To.Reg), uint32(p.From.Index), uint32(r)) } else { if int32(int16(v)) != v { @@ -2467,6 +2491,18 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { rel.Sym = p.From.Sym rel.Type = obj.R_POWER_TLS_LE + case 80: + if p.From.Offset != 0 { + ctxt.Diag("invalid offset against tls var %v", p) + } + o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0) + o2 = AOP_IRR(uint32(opload(ctxt, AMOVD)), uint32(p.To.Reg), uint32(p.To.Reg), 0) + rel := obj.Addrel(ctxt.Cursym) + rel.Off = int32(ctxt.Pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Type = obj.R_POWER_TLS_IE + } out[0] = o1 diff --git a/src/cmd/link/internal/ld/elf.go b/src/cmd/link/internal/ld/elf.go index 51552e3c152f92..505cbcdbb1ab3d 100644 --- a/src/cmd/link/internal/ld/elf.go +++ b/src/cmd/link/internal/ld/elf.go @@ -564,23 +564,28 @@ const ( R_PPC_EMB_BIT_FLD = 115 R_PPC_EMB_RELSDA = 116 - R_PPC64_ADDR32 = R_PPC_ADDR32 - R_PPC64_ADDR16_LO = R_PPC_ADDR16_LO - R_PPC64_ADDR16_HA = R_PPC_ADDR16_HA - R_PPC64_REL24 = R_PPC_REL24 - R_PPC64_JMP_SLOT = R_PPC_JMP_SLOT - R_PPC64_TPREL16 = R_PPC_TPREL16 - R_PPC64_ADDR64 = 38 - R_PPC64_TOC16 = 47 - R_PPC64_TOC16_LO = 48 - R_PPC64_TOC16_HI = 49 - R_PPC64_TOC16_HA = 50 - R_PPC64_ADDR16_LO_DS = 57 - R_PPC64_TOC16_DS = 63 - R_PPC64_TOC16_LO_DS = 64 - R_PPC64_REL16_LO = 250 - R_PPC64_REL16_HI = 251 - R_PPC64_REL16_HA = 252 + R_PPC64_ADDR32 = R_PPC_ADDR32 + R_PPC64_ADDR16_LO = R_PPC_ADDR16_LO + R_PPC64_ADDR16_HA = R_PPC_ADDR16_HA + R_PPC64_REL24 = R_PPC_REL24 + R_PPC64_GOT16_HA = R_PPC_GOT16_HA + R_PPC64_JMP_SLOT = R_PPC_JMP_SLOT + R_PPC64_TPREL16 = R_PPC_TPREL16 + R_PPC64_ADDR64 = 38 + R_PPC64_TOC16 = 47 + R_PPC64_TOC16_LO = 48 + R_PPC64_TOC16_HI = 49 + R_PPC64_TOC16_HA = 50 + R_PPC64_ADDR16_LO_DS = 57 + R_PPC64_GOT16_LO_DS = 59 + R_PPC64_TOC16_DS = 63 + R_PPC64_TOC16_LO_DS = 64 + R_PPC64_TLS = 67 + R_PPC64_GOT_TPREL16_LO_DS = 88 + R_PPC64_GOT_TPREL16_HA = 90 + R_PPC64_REL16_LO = 250 + R_PPC64_REL16_HI = 251 + R_PPC64_REL16_HA = 252 R_SPARC_NONE = 0 R_SPARC_8 = 1 diff --git a/src/cmd/link/internal/ppc64/asm.go b/src/cmd/link/internal/ppc64/asm.go index 97efe7d354f06f..706160d0fb0340 100644 --- a/src/cmd/link/internal/ppc64/asm.go +++ b/src/cmd/link/internal/ppc64/asm.go @@ -310,9 +310,18 @@ func elfreloc1(r *ld.Reloc, sectoff int64) int { return -1 } + case obj.R_POWER_TLS: + ld.Thearch.Vput(ld.R_PPC64_TLS | uint64(elfsym)<<32) + case obj.R_POWER_TLS_LE: ld.Thearch.Vput(ld.R_PPC64_TPREL16 | uint64(elfsym)<<32) + case obj.R_POWER_TLS_IE: + ld.Thearch.Vput(ld.R_PPC64_GOT_TPREL16_HA | uint64(elfsym)<<32) + ld.Thearch.Vput(uint64(r.Xadd)) + ld.Thearch.Vput(uint64(sectoff + 4)) + ld.Thearch.Vput(ld.R_PPC64_GOT_TPREL16_LO_DS | uint64(elfsym)<<32) + case obj.R_ADDRPOWER: ld.Thearch.Vput(ld.R_PPC64_ADDR16_HA | uint64(elfsym)<<32) ld.Thearch.Vput(uint64(r.Xadd)) @@ -444,7 +453,7 @@ func archreloc(r *ld.Reloc, s *ld.LSym, val *int64) int { default: return -1 - case obj.R_POWER_TLS_LE: + case obj.R_POWER_TLS, obj.R_POWER_TLS_LE, obj.R_POWER_TLS_IE: r.Done = 0 // check Outer is nil, Type is TLSBSS? r.Xadd = r.Add diff --git a/src/runtime/tls_ppc64x.s b/src/runtime/tls_ppc64x.s index 1b030fd36adec4..c79c97dae3f570 100644 --- a/src/runtime/tls_ppc64x.s +++ b/src/runtime/tls_ppc64x.s @@ -27,8 +27,7 @@ TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0-0 CMP R31, $0 BEQ nocgo MOVD runtime·tls_g(SB), R31 - ADD R13, R31 - MOVD g, 0(R31) + MOVD g, 0(R13)(R31*1) nocgo: RET @@ -44,8 +43,7 @@ nocgo: // NOTE: _cgo_topofstack assumes this only clobbers g (R30), and R31. TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0-0 MOVD runtime·tls_g(SB), R31 - ADD R13, R31 - MOVD 0(R31), g + MOVD 0(R13)(R31*1), g RET GLOBL runtime·tls_g+0(SB), TLSBSS, $8