Skip to content

Commit

Permalink
drop typed GEP calls (JuliaLang#55708)
Browse files Browse the repository at this point in the history
Now that we use LLVM 18, and almost have LLVM 19 support, do cleanup to
remove LLVM 15/16 type pointer support. LLVM now slightly prefers that
we rewrite our complex GEP to use a simple emit_ptrgep call instead,
which is also much simpler for julia to emit also.
  • Loading branch information
vtjnash authored Sep 10, 2024
1 parent d280792 commit 3653b38
Show file tree
Hide file tree
Showing 12 changed files with 187 additions and 350 deletions.
11 changes: 4 additions & 7 deletions src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1854,8 +1854,8 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
ctx.builder.SetInsertPoint(checkBB);
auto signal_page_load = ctx.builder.CreateLoad(
ctx.types().T_size,
ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size,
get_current_signal_page_from_ptls(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const), -1),
emit_ptrgep(ctx, get_current_signal_page_from_ptls(ctx.builder, get_current_ptls(ctx), ctx.tbaa().tbaa_const),
-sizeof(size_t)),
true);
setName(ctx.emission_context, signal_page_load, "signal_page_load");
ctx.builder.CreateBr(contBB);
Expand All @@ -1870,8 +1870,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
auto obj = emit_pointer_from_objref(ctx, boxed(ctx, argv[0])); // T_pprjlvalue
// The inbounds gep makes it more clear to LLVM that the resulting value is not
// a null pointer.
auto strp = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, obj, 1);
setName(ctx.emission_context, strp, "string_ptr");
auto strp = emit_ptrgep(ctx, obj, ctx.types().sizeof_ptr, "string_ptr");
JL_GC_POP();
return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt);
}
Expand All @@ -1882,9 +1881,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
auto obj = emit_pointer_from_objref(ctx, boxed(ctx, argv[0])); // T_pprjlvalue
// The inbounds gep makes it more clear to LLVM that the resulting value is not
// a null pointer.
auto strp = ctx.builder.CreateConstInBoundsGEP1_32(
ctx.types().T_prjlvalue, obj, (sizeof(jl_sym_t) + sizeof(void*) - 1) / sizeof(void*));
setName(ctx.emission_context, strp, "symbol_name");
auto strp = emit_ptrgep(ctx, obj, sizeof(jl_sym_t), "symbol_name");
JL_GC_POP();
return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt);
}
Expand Down
126 changes: 36 additions & 90 deletions src/cgutils.cpp

Large diffs are not rendered by default.

123 changes: 53 additions & 70 deletions src/codegen.cpp

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -767,7 +767,7 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
LLT_ALIGN(size, jl_datatype_align(ety))));
setName(ctx.emission_context, im1, "pointerref_offset");
Value *thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ);
thePtr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1);
thePtr = emit_ptrgep(ctx, thePtr, im1);
setName(ctx.emission_context, thePtr, "pointerref_src");
MDNode *tbaa = best_tbaa(ctx.tbaa(), ety);
emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, Align(sizeof(jl_value_t*)), Align(align_nb));
Expand Down Expand Up @@ -848,7 +848,7 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size,
LLT_ALIGN(size, jl_datatype_align(ety))));
setName(ctx.emission_context, im1, "pointerset_offset");
auto gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1);
auto gep = emit_ptrgep(ctx, thePtr, im1);
setName(ctx.emission_context, gep, "pointerset_ptr");
emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, Align(align_nb), Align(julia_alignment(ety)));
}
Expand Down
21 changes: 1 addition & 20 deletions src/llvm-alloc-opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -770,26 +770,7 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocF
user->replaceUsesOfWith(orig_i, replace);
}
else if (isa<AddrSpaceCastInst>(user) || isa<BitCastInst>(user)) {
#if JL_LLVM_VERSION >= 170000
#ifndef JL_NDEBUG
auto cast_t = PointerType::get(user->getType(), new_i->getType()->getPointerAddressSpace());
Type *new_t = new_i->getType();
assert(cast_t == new_t);
#endif
auto replace_i = new_i;
#else
auto cast_t = PointerType::getWithSamePointeeType(cast<PointerType>(user->getType()), new_i->getType()->getPointerAddressSpace());
auto replace_i = new_i;
Type *new_t = new_i->getType();
if (cast_t != new_t) {
// Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
assert(cast_t->getContext().supportsTypedPointers());
replace_i = new BitCastInst(replace_i, cast_t, "", user);
replace_i->setDebugLoc(user->getDebugLoc());
replace_i->takeName(user);
}
#endif
push_frame(user, replace_i);
push_frame(user, new_i);
}
else if (auto gep = dyn_cast<GetElementPtrInst>(user)) {
SmallVector<Value *, 4> IdxOperands(gep->idx_begin(), gep->idx_end());
Expand Down
45 changes: 19 additions & 26 deletions src/llvm-codegen-shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ struct CountTrackedPointers {
CountTrackedPointers(llvm::Type *T, bool ignore_loaded=false);
};

unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::Type *DTy, llvm::IRBuilder<> &irbuilder);
unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::IRBuilder<> &irbuilder);
llvm::SmallVector<llvm::Value*, 0> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> &irbuilder, llvm::ArrayRef<unsigned> perm_offsets={});

static inline void llvm_dump(llvm::Value *v)
Expand Down Expand Up @@ -187,45 +187,39 @@ static inline llvm::Instruction *tbaa_decorate(llvm::MDNode *md, llvm::Instructi
}

// Get PTLS through current task.
static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *pgcstack)
static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &builder, llvm::Value *pgcstack)
{
using namespace llvm;
auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
auto i8 = builder.getInt8Ty();
const int pgcstack_offset = offsetof(jl_task_t, gcstack);
return builder.CreateInBoundsGEP(
T_pjlvalue, pgcstack,
ConstantInt::get(T_size, -(pgcstack_offset / sizeof(void *))),
"current_task");
return builder.CreateConstInBoundsGEP1_32(i8, pgcstack, -pgcstack_offset, "current_task");
}

// Get PTLS through current task.
static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *current_task, llvm::MDNode *tbaa)
static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Value *current_task, llvm::MDNode *tbaa)
{
using namespace llvm;
auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
auto i8 = builder.getInt8Ty();
auto T_ptr = builder.getPtrTy();
const int ptls_offset = offsetof(jl_task_t, ptls);
llvm::Value *pptls = builder.CreateInBoundsGEP(
T_pjlvalue, current_task,
ConstantInt::get(T_size, ptls_offset / sizeof(void *)),
"ptls_field");
LoadInst *ptls_load = builder.CreateAlignedLoad(T_pjlvalue,
pptls, Align(sizeof(void *)), "ptls_load");
llvm::Value *pptls = builder.CreateConstInBoundsGEP1_32(i8, current_task, ptls_offset, "ptls_field");
LoadInst *ptls_load = builder.CreateAlignedLoad(T_ptr, pptls, Align(sizeof(void *)), "ptls_load");
// Note: Corresponding store (`t->ptls = ptls`) happens in `ctx_switch` of tasks.c.
tbaa_decorate(tbaa, ptls_load);
return ptls_load;
}

// Get signal page through current task.
static inline llvm::Value *get_current_signal_page_from_ptls(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::MDNode *tbaa)
static inline llvm::Value *get_current_signal_page_from_ptls(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::MDNode *tbaa)
{
using namespace llvm;
// return builder.CreateCall(prepare_call(reuse_signal_page_func));
auto T_psize = T_size->getPointerTo();
int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void *);
llvm::Value *psafepoint = builder.CreateInBoundsGEP(
T_psize, ptls, ConstantInt::get(T_size, nthfield));
auto T_ptr = builder.getPtrTy();
auto i8 = builder.getInt8Ty();
int nthfield = offsetof(jl_tls_states_t, safepoint);
llvm::Value *psafepoint = builder.CreateConstInBoundsGEP1_32(i8, ptls, nthfield);
LoadInst *ptls_load = builder.CreateAlignedLoad(
T_psize, psafepoint, Align(sizeof(void *)), "safepoint");
T_ptr, psafepoint, Align(sizeof(void *)), "safepoint");
tbaa_decorate(tbaa, ptls_load);
return ptls_load;
}
Expand All @@ -239,7 +233,7 @@ static inline void emit_signal_fence(llvm::IRBuilder<> &builder)
static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::MDNode *tbaa, bool final = false)
{
using namespace llvm;
llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, T_size, ptls, tbaa);
llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, ptls, tbaa);
emit_signal_fence(builder);
Module *M = builder.GetInsertBlock()->getModule();
LLVMContext &C = builder.getContext();
Expand All @@ -250,8 +244,7 @@ static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_s
else {
Function *F = M->getFunction("julia.safepoint");
if (!F) {
auto T_psize = T_size->getPointerTo();
FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_psize}, false);
FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_size->getPointerTo()}, false);
F = Function::Create(FT, Function::ExternalLinkage, "julia.safepoint", M);
#if JL_LLVM_VERSION >= 160000
F->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly());
Expand All @@ -268,8 +261,8 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::T
{
using namespace llvm;
Type *T_int8 = state->getType();
Constant *offset = ConstantInt::getSigned(builder.getInt32Ty(), offsetof(jl_tls_states_t, gc_state));
Value *gc_state = builder.CreateInBoundsGEP(T_int8, ptls, ArrayRef<Value*>(offset), "gc_state");
unsigned offset = offsetof(jl_tls_states_t, gc_state);
Value *gc_state = builder.CreateConstInBoundsGEP1_32(T_int8, ptls, offset, "gc_state");
if (old_state == nullptr) {
old_state = builder.CreateLoad(T_int8, gc_state);
cast<LoadInst>(old_state)->setOrdering(AtomicOrdering::Monotonic);
Expand Down
72 changes: 4 additions & 68 deletions src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,15 +350,7 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
ConstantInt::get(Type::getInt32Ty(Cond->getContext()), i),
"", SI);
}
#if JL_LLVM_VERSION >= 170000
assert(FalseElem->getType() == TrueElem->getType());
#else
if (FalseElem->getType() != TrueElem->getType()) {
// Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
assert(FalseElem->getContext().supportsTypedPointers());
FalseElem = new BitCastInst(FalseElem, TrueElem->getType(), "", SI);
}
#endif
SelectInst *SelectBase = SelectInst::Create(Cond, TrueElem, FalseElem, "gclift", SI);
int Number = ++S.MaxPtrNumber;
S.AllPtrNumbering[SelectBase] = Number;
Expand Down Expand Up @@ -427,33 +419,7 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
BaseElem = Base;
else
BaseElem = IncomingBases[i];
#if JL_LLVM_VERSION >= 170000
assert(BaseElem->getType() == T_prjlvalue);
#else
if (BaseElem->getType() != T_prjlvalue) {
// Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
assert(BaseElem->getContext().supportsTypedPointers());
auto &remap = CastedRoots[i][BaseElem];
if (!remap) {
if (auto constant = dyn_cast<Constant>(BaseElem)) {
remap = ConstantExpr::getBitCast(constant, T_prjlvalue, "");
} else {
Instruction *InsertBefore;
if (auto arg = dyn_cast<Argument>(BaseElem)) {
InsertBefore = &*arg->getParent()->getEntryBlock().getFirstInsertionPt();
} else {
assert(isa<Instruction>(BaseElem) && "Unknown value type detected!");
InsertBefore = cast<Instruction>(BaseElem)->getNextNonDebugInstruction();
}
while (isa<PHINode>(InsertBefore)) {
InsertBefore = InsertBefore->getNextNonDebugInstruction();
}
remap = new BitCastInst(BaseElem, T_prjlvalue, "", InsertBefore);
}
}
BaseElem = remap;
}
#endif
lift->addIncoming(BaseElem, IncomingBB);
}
}
Expand Down Expand Up @@ -1528,14 +1494,11 @@ SmallVector<Value*, 0> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, I
return Ptrs;
}

unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, Type *DTy, IRBuilder<> &irbuilder) {
unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, IRBuilder<> &irbuilder) {
auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder);
for (unsigned i = 0; i < Ptrs.size(); ++i) {
Value *Elem = Ptrs[i];// Dst has type `[n x {}*]*`
Value *Slot = irbuilder.CreateConstInBoundsGEP2_32(DTy, Dst, 0, i);
#if JL_LLVM_VERSION < 170000
assert(cast<PointerType>(Dst->getType())->isOpaqueOrPointeeTypeMatches(DTy));
#endif
Value *Elem = Ptrs[i];
Value *Slot = irbuilder.CreateConstInBoundsGEP1_32(irbuilder.getInt8Ty(), Dst, i * sizeof(void*));
StoreInst *shadowStore = irbuilder.CreateAlignedStore(Elem, Slot, Align(sizeof(void*)));
shadowStore->setOrdering(AtomicOrdering::NotAtomic);
// TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
Expand Down Expand Up @@ -2133,7 +2096,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
// the type tag. (Note that if the size is not a constant, it will call
// gc_alloc_obj, and will redundantly set the tag.)
auto allocBytesIntrinsic = getOrDeclare(jl_intrinsics::GCAllocBytes);
auto ptls = get_current_ptls_from_task(builder, T_size, CI->getArgOperand(0), tbaa_gcframe);
auto ptls = get_current_ptls_from_task(builder, CI->getArgOperand(0), tbaa_gcframe);
auto newI = builder.CreateCall(
allocBytesIntrinsic,
{
Expand Down Expand Up @@ -2319,15 +2282,7 @@ void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColor
// Pointee types don't have semantics, so the optimizer is
// free to rewrite them if convenient. We need to change
// it back here for the store.
#if JL_LLVM_VERSION >= 170000
assert(Val->getType() == T_prjlvalue);
#else
if (Val->getType() != T_prjlvalue) {
// Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
assert(Val->getContext().supportsTypedPointers());
Val = new BitCastInst(Val, T_prjlvalue, "", InsertBefore);
}
#endif
new StoreInst(Val, slotAddress, InsertBefore);
}

Expand Down Expand Up @@ -2407,18 +2362,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, St
for (CallInst *II : ToDelete) {
II->eraseFromParent();
}
#if JL_LLVM_VERSION >= 170000
assert(slotAddress->getType() == AI->getType());
#else
if (slotAddress->getType() != AI->getType()) {
// If we're replacing an ArrayAlloca, the pointer element type may need to be fixed up
// Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
assert(slotAddress->getContext().supportsTypedPointers());
auto BCI = new BitCastInst(slotAddress, AI->getType());
BCI->insertAfter(slotAddress);
slotAddress = BCI;
}
#endif
AI->replaceAllUsesWith(slotAddress);
AI->eraseFromParent();
AI = NULL;
Expand All @@ -2443,15 +2387,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, St
slotAddress->insertAfter(gcframe);
auto ValExpr = std::make_pair(Base, isa<PointerType>(Base->getType()) ? -1 : i);
auto Elem = MaybeExtractScalar(S, ValExpr, SI);
#if JL_LLVM_VERSION >= 170000
assert(Elem->getType() == T_prjlvalue);
#else
if (Elem->getType() != T_prjlvalue) {
// Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
assert(Elem->getContext().supportsTypedPointers());
Elem = new BitCastInst(Elem, T_prjlvalue, "", SI);
}
#endif
//auto Idxs = ArrayRef<unsigned>(Tracked[i]);
//Value *Elem = ExtractScalar(Base, true, Idxs, SI);
Value *shadowStore = new StoreInst(Elem, slotAddress, SI);
Expand Down
4 changes: 2 additions & 2 deletions src/llvm-ptls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
builder.SetInsertPoint(fastTerm->getParent());
fastTerm->removeFromParent();
MDNode *tbaa = tbaa_gcframe;
Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, pgcstack), tbaa), true);
Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa), true);
builder.Insert(fastTerm);
phi->addIncoming(pgcstack, fastTerm->getParent());
// emit pre-return cleanup
Expand All @@ -203,7 +203,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
for (auto &BB : *pgcstack->getParent()->getParent()) {
if (isa<ReturnInst>(BB.getTerminator())) {
builder.SetInsertPoint(BB.getTerminator());
emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, phi), tbaa), last_gc_state, true);
emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, phi), tbaa), last_gc_state, true);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion test/llvmpasses/alloc-opt-gcframe.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; CHECK-NOT: @julia.gc_alloc_obj

; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %gcstack, i64 -12
; OPAQUE: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
; OPAQUE: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
; OPAQUE-NEXT: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_small_alloc(ptr [[ptls_load]], i32 [[SIZE_T:[0-9]+]], i32 16, i64 {{.*}} @tag {{.*}})
; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4
Expand Down
Loading

0 comments on commit 3653b38

Please sign in to comment.