Skip to content

Commit

Permalink
Bug 1846534 - Track unwind info for tail calls. r=jseward,mstange,pro…
Browse files Browse the repository at this point in the history
…filer-reviewers

Tail calls are doing some stack data manipulations, and profiler (iterator) needs to know how to find where caller RA and FP stored. A platform now preserves temporary registers used to store FP/RA using collapse frame operations.

Differential Revision: https://phabricator.services.mozilla.com/D183269
  • Loading branch information
yurydelendik committed Sep 26, 2023
1 parent 0e32e6e commit 8667790
Show file tree
Hide file tree
Showing 27 changed files with 582 additions and 39 deletions.
23 changes: 21 additions & 2 deletions js/public/ProfilingFrameIterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,30 @@ class MOZ_NON_PARAM JS_PUBLIC_API ProfilingFrameIterator {

public:
struct RegisterState {
RegisterState() : pc(nullptr), sp(nullptr), fp(nullptr), lr(nullptr) {}
RegisterState()
: pc(nullptr),
sp(nullptr),
fp(nullptr),
unused1(nullptr),
unused2(nullptr) {}
void* pc;
void* sp;
void* fp;
void* lr;
union {
// Value of the LR register on ARM platforms.
void* lr;
// The return address during a tail call operation.
// Note that for ARM is still the value of LR register.
void* tempRA;
// Undefined on non-ARM plaforms outside tail calls operations.
void* unused1;
};
union {
// The FP reference during a tail call operation.
void* tempFP;
// Undefined outside tail calls operations.
void* unused2;
};
};

ProfilingFrameIterator(
Expand Down
68 changes: 68 additions & 0 deletions js/src/jit-test/tests/wasm/tail-calls/return-call-profiling.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Tests if the profiler (frame iterator) can unwind in the middle
// of collapse frame instructions.

enableGeckoProfiling();
try {
enableSingleStepProfiling();
} catch (e) {
// continue anyway if single step profiling is not supported
}

var ins = wasmEvalText(`
(module
(func $f (param i64 i64 i64 i64 i64 i64 i64 i64 i64)
local.get 0
i64.eqz
br_if 0
local.get 0
return_call $g
)
(func $g (param i64)
local.get 0
i64.const 1
i64.sub
i64.const 2
i64.const 6
i64.const 3
i64.const 4
i64.const 1
i64.const 2
i64.const 6
i64.const 3
return_call $f
)
(func (export "run") (param i64)
local.get 0
call $g
)
)`);

for (var i = 0; i < 10; i++) {
ins.exports.run(100n);
}

// Also when trampoline is used.
var ins0 = wasmEvalText(`(module (func (export "t")))`);
var ins = wasmEvalText(`
(module
(import "" "t" (func $g))
(func $f (return_call_indirect $t (i32.const 0)))
(table $t 1 1 funcref)
(func (export "run") (param i64)
loop
local.get 0
i64.eqz
br_if 1
call $f
local.get 0
i64.const 1
i64.sub
local.set 0
br 0
end
)
(elem (i32.const 0) $g)
)`, {"": {t: ins0.exports.t},});

ins.exports.run(10n);
40 changes: 35 additions & 5 deletions js/src/jit/MacroAssembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4748,13 +4748,16 @@ static ReturnCallTrampolineData MakeReturnCallTrampoline(MacroAssembler& masm) {
masm.moveToStackPtr(FramePointer);
# ifdef JS_CODEGEN_ARM64
masm.pop(FramePointer, lr);
masm.append(wasm::CodeRangeUnwindInfo::UseFpLr, masm.currentOffset());
masm.Mov(PseudoStackPointer64, vixl::sp);
masm.abiret();
# else
masm.pop(FramePointer);
masm.append(wasm::CodeRangeUnwindInfo::UseFp, masm.currentOffset());
masm.ret();
# endif

masm.append(wasm::CodeRangeUnwindInfo::Normal, masm.currentOffset());
masm.setFramePushed(savedPushed);
return data;
}
Expand Down Expand Up @@ -4829,6 +4832,7 @@ static void CollapseWasmFrameFast(MacroAssembler& masm,
masm.loadPtr(Address(FramePointer, wasm::Frame::callerFPOffset()), tempForFP);
masm.loadPtr(Address(FramePointer, wasm::Frame::returnAddressOffset()),
tempForRA);
masm.append(wasm::CodeRangeUnwindInfo::RestoreFpRa, masm.currentOffset());
bool copyCallerSlot = oldSlotsAndStackArgBytes != newSlotsAndStackArgBytes;
if (copyCallerSlot) {
masm.loadPtr(
Expand Down Expand Up @@ -4868,12 +4872,21 @@ static void CollapseWasmFrameFast(MacroAssembler& masm,
masm.storePtr(tempForRA,
Address(FramePointer,
newFrameOffset + wasm::Frame::returnAddressOffset()));
masm.pop(tempForRA);
// Restore tempForRA, but keep RA on top of the stack.
// There is no non-locking exchange instruction between register and memory.
// Using tempForCaller as scratch register.
masm.loadPtr(Address(masm.getStackPointer(), 0), tempForCaller);
masm.storePtr(tempForRA, Address(masm.getStackPointer(), 0));
masm.mov(tempForCaller, tempForRA);
masm.append(wasm::CodeRangeUnwindInfo::RestoreFp, masm.currentOffset());
masm.addToStackPtr(Imm32(framePushedAtStart + newFrameOffset +
wasm::Frame::returnAddressOffset()));
wasm::Frame::returnAddressOffset() + sizeof(void*)));
# endif

masm.movePtr(tempForFP, FramePointer);
// Setting framePushed to pre-collapse state, to properly set that in the
// following code.
masm.setFramePushed(framePushedAtStart);
}

static void CollapseWasmFrameSlow(MacroAssembler& masm,
Expand Down Expand Up @@ -4938,6 +4951,7 @@ static void CollapseWasmFrameSlow(MacroAssembler& masm,
masm.loadPtr(Address(FramePointer, wasm::Frame::callerFPOffset()), tempForFP);
masm.loadPtr(Address(FramePointer, wasm::Frame::returnAddressOffset()),
tempForRA);
masm.append(wasm::CodeRangeUnwindInfo::RestoreFpRa, masm.currentOffset());
masm.loadPtr(
Address(FramePointer, newArgSrc + WasmCallerInstanceOffsetBeforeCall),
tempForCaller);
Expand Down Expand Up @@ -4999,15 +5013,24 @@ static void CollapseWasmFrameSlow(MacroAssembler& masm,
masm.storePtr(tempForRA,
Address(FramePointer,
newFrameOffset + wasm::Frame::returnAddressOffset()));
masm.pop(tempForRA);
masm.freeStack(reserved);
// Restore tempForRA, but keep RA on top of the stack.
// There is no non-locking exchange instruction between register and memory.
// Using tempForCaller as scratch register.
masm.loadPtr(Address(masm.getStackPointer(), 0), tempForCaller);
masm.storePtr(tempForRA, Address(masm.getStackPointer(), 0));
masm.mov(tempForCaller, tempForRA);
masm.append(wasm::CodeRangeUnwindInfo::RestoreFp, masm.currentOffset());
masm.addToStackPtr(Imm32(framePushedAtStart + newFrameOffset +
wasm::Frame::returnAddressOffset()));
wasm::Frame::returnAddressOffset() + reserved +
sizeof(void*)));
# endif

// Point FramePointer to hidden frame.
masm.computeEffectiveAddress(Address(FramePointer, newFPOffset),
FramePointer);
// Setting framePushed to pre-collapse state, to properly set that in the
// following code.
masm.setFramePushed(framePushedAtStart);
}

void MacroAssembler::wasmCollapseFrameFast(
Expand All @@ -5028,6 +5051,7 @@ void MacroAssembler::wasmCollapseFrameSlow(
wasmCheckSlowCallsite(temp1, &slow, temp1, temp2);
CollapseWasmFrameFast(*this, retCallInfo);
jump(&done);
append(wasm::CodeRangeUnwindInfo::Normal, currentOffset());

ReturnCallTrampolineData data = MakeReturnCallTrampoline(*this);

Expand Down Expand Up @@ -5125,6 +5149,7 @@ CodeOffset MacroAssembler::wasmReturnCallImport(
wasm::CallSiteDesc::ReturnStub);
wasmCollapseFrameSlow(retCallInfo, stubDesc);
jump(ABINonArgReg0);
append(wasm::CodeRangeUnwindInfo::Normal, currentOffset());
return CodeOffset(currentOffset());
}

Expand All @@ -5134,6 +5159,7 @@ CodeOffset MacroAssembler::wasmReturnCall(
wasmCollapseFrameFast(retCallInfo);
CodeOffset offset = farJumpWithPatch();
append(desc, offset, funcDefIndex);
append(wasm::CodeRangeUnwindInfo::Normal, currentOffset());
return offset;
}
#endif // ENABLE_WASM_TAIL_CALLS
Expand Down Expand Up @@ -5476,6 +5502,7 @@ void MacroAssembler::wasmReturnCallIndirect(
wasmCollapseFrameSlow(retCallInfo, stubDesc);
jump(calleeScratch);
*slowCallOffset = CodeOffset(currentOffset());
append(wasm::CodeRangeUnwindInfo::Normal, currentOffset());

// Fast path: just load the code pointer and go.

Expand All @@ -5487,6 +5514,7 @@ void MacroAssembler::wasmReturnCallIndirect(
wasmCollapseFrameFast(retCallInfo);
jump(calleeScratch);
*fastCallOffset = CodeOffset(currentOffset());
append(wasm::CodeRangeUnwindInfo::Normal, currentOffset());
}
#endif // ENABLE_WASM_TAIL_CALLS

Expand Down Expand Up @@ -5600,6 +5628,7 @@ void MacroAssembler::wasmReturnCallRef(
wasm::CallSiteDesc::ReturnStub);
wasmCollapseFrameSlow(retCallInfo, stubDesc);
jump(calleeScratch);
append(wasm::CodeRangeUnwindInfo::Normal, currentOffset());

// Fast path: just load WASM_FUNC_UNCHECKED_ENTRY_SLOT value and go.
// The instance and pinned registers are the same as in the caller.
Expand All @@ -5610,6 +5639,7 @@ void MacroAssembler::wasmReturnCallRef(

wasmCollapseFrameFast(retCallInfo);
jump(calleeScratch);
append(wasm::CodeRangeUnwindInfo::Normal, currentOffset());
}
#endif

Expand Down
10 changes: 10 additions & 0 deletions js/src/jit/shared/Assembler-shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,8 @@ class AssemblerShared {
wasm::TrapSiteVectorArray trapSites_;
wasm::SymbolicAccessVector symbolicAccesses_;
wasm::TryNoteVector tryNotes_;
wasm::CodeRangeUnwindInfoVector codeRangesUnwind_;

#ifdef DEBUG
// To facilitate figuring out which part of SM created each instruction as
// shown by IONFLAGS=codegen, this maintains a stack of (notionally)
Expand Down Expand Up @@ -694,11 +696,19 @@ class AssemblerShared {
return true;
}

void append(wasm::CodeRangeUnwindInfo::UnwindHow unwindHow,
uint32_t pcOffset) {
enoughMemory_ &= codeRangesUnwind_.emplaceBack(pcOffset, unwindHow);
}

wasm::CallSiteVector& callSites() { return callSites_; }
wasm::CallSiteTargetVector& callSiteTargets() { return callSiteTargets_; }
wasm::TrapSiteVectorArray& trapSites() { return trapSites_; }
wasm::SymbolicAccessVector& symbolicAccesses() { return symbolicAccesses_; }
wasm::TryNoteVector& tryNotes() { return tryNotes_; }
wasm::CodeRangeUnwindInfoVector& codeRangeUnwindInfos() {
return codeRangesUnwind_;
}
};

// AutoCreatedBy pushes and later pops a who-created-these-insns? tag into the
Expand Down
1 change: 1 addition & 0 deletions js/src/shell/js.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7238,6 +7238,7 @@ static void SingleStepCallback(void* arg, jit::Simulator* sim, void* pc) {
state.sp = (void*)sim->get_register(jit::Simulator::sp);
state.lr = (void*)sim->get_register(jit::Simulator::lr);
state.fp = (void*)sim->get_register(jit::Simulator::fp);
state.tempFP = (void*)sim->get_register(jit::Simulator::r7);
# elif defined(JS_SIMULATOR_MIPS64) || defined(JS_SIMULATOR_MIPS32)
state.sp = (void*)sim->getRegister(jit::Simulator::sp);
state.lr = (void*)sim->getRegister(jit::Simulator::ra);
Expand Down
9 changes: 7 additions & 2 deletions js/src/wasm/WasmBaselineCompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11531,6 +11531,8 @@ bool js::wasm::BaselineCompileFunctions(const ModuleEnvironment& moduleEnv,
return false;
}

size_t unwindInfoBefore = masm.codeRangeUnwindInfos().length();

// One-pass baseline compilation.

BaseCompiler f(moduleEnv, compilerEnv, func, locals, trapExitLayout,
Expand All @@ -11542,8 +11544,11 @@ bool js::wasm::BaselineCompileFunctions(const ModuleEnvironment& moduleEnv,
if (!f.emitFunction()) {
return false;
}
if (!code->codeRanges.emplaceBack(func.index, func.lineOrBytecode,
f.finish())) {
FuncOffsets offsets(f.finish());
bool hasUnwindInfo =
unwindInfoBefore != masm.codeRangeUnwindInfos().length();
if (!code->codeRanges.emplaceBack(func.index, func.lineOrBytecode, offsets,
hasUnwindInfo)) {
return false;
}
}
Expand Down
1 change: 1 addition & 0 deletions js/src/wasm/WasmBuiltins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1925,6 +1925,7 @@ bool wasm::EnsureBuiltinThunksInitialized() {
MOZ_ASSERT(masm.callSiteTargets().empty());
MOZ_ASSERT(masm.trapSites().empty());
MOZ_ASSERT(masm.tryNotes().empty());
MOZ_ASSERT(masm.codeRangeUnwindInfos().empty());

if (!ExecutableAllocator::makeExecutableAndFlushICache(thunks->codeBase,
thunks->codeSize)) {
Expand Down
35 changes: 35 additions & 0 deletions js/src/wasm/WasmCode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,7 @@ size_t MetadataTier::sizeOfExcludingThis(MallocSizeOf mallocSizeOf) const {
codeRanges.sizeOfExcludingThis(mallocSizeOf) +
callSites.sizeOfExcludingThis(mallocSizeOf) +
tryNotes.sizeOfExcludingThis(mallocSizeOf) +
codeRangeUnwindInfos.sizeOfExcludingThis(mallocSizeOf) +
trapSites.sizeOfExcludingThis(mallocSizeOf) +
stackMaps.sizeOfExcludingThis(mallocSizeOf) +
funcImports.sizeOfExcludingThis(mallocSizeOf) +
Expand Down Expand Up @@ -534,6 +535,7 @@ bool LazyStubTier::createManyEntryStubs(const Uint32Vector& funcExportIndices,
MOZ_ASSERT(masm.callSiteTargets().empty());
MOZ_ASSERT(masm.trapSites().empty());
MOZ_ASSERT(masm.tryNotes().empty());
MOZ_ASSERT(masm.codeRangeUnwindInfos().empty());

if (masm.oom()) {
return false;
Expand Down Expand Up @@ -1083,6 +1085,39 @@ bool Code::lookupTrap(void* pc, Trap* trapOut, BytecodeOffset* bytecode) const {
return false;
}

struct UnwindInfoPCOffset {
const CodeRangeUnwindInfoVector& info;
explicit UnwindInfoPCOffset(const CodeRangeUnwindInfoVector& info)
: info(info) {}
uint32_t operator[](size_t index) const { return info[index].offset(); }
};

const CodeRangeUnwindInfo* Code::lookupUnwindInfo(void* pc) const {
for (Tier t : tiers()) {
uint32_t target = ((uint8_t*)pc) - segment(t).base();
const CodeRangeUnwindInfoVector& unwindInfoArray =
metadata(t).codeRangeUnwindInfos;
size_t match;
const CodeRangeUnwindInfo* info = nullptr;
if (BinarySearch(UnwindInfoPCOffset(unwindInfoArray), 0,
unwindInfoArray.length(), target, &match)) {
info = &unwindInfoArray[match];
} else {
// Exact match is not found, using insertion point to get the previous
// info entry; skip if info is outside of codeRangeUnwindInfos.
if (match == 0) continue;
if (match == unwindInfoArray.length()) {
MOZ_ASSERT(unwindInfoArray[unwindInfoArray.length() - 1].unwindHow() ==
CodeRangeUnwindInfo::Normal);
continue;
}
info = &unwindInfoArray[match - 1];
}
return info->unwindHow() == CodeRangeUnwindInfo::Normal ? nullptr : info;
}
return nullptr;
}

// When enabled, generate profiling labels for every name in funcNames_ that is
// the name of some Function CodeRange. This involves malloc() so do it now
// since, once we start sampling, we'll be in a signal-handing context where we
Expand Down
2 changes: 2 additions & 0 deletions js/src/wasm/WasmCode.h
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,7 @@ struct MetadataTier {
FuncExportVector funcExports;
StackMaps stackMaps;
TryNoteVector tryNotes;
CodeRangeUnwindInfoVector codeRangeUnwindInfos;

// Debug information, not serialized.
uint32_t debugTrapOffset;
Expand Down Expand Up @@ -847,6 +848,7 @@ class Code : public ShareableBase<Code> {
const TryNote* lookupTryNote(void* pc, Tier* tier) const;
bool containsCodePC(const void* pc) const;
bool lookupTrap(void* pc, Trap* trap, BytecodeOffset* bytecode) const;
const CodeRangeUnwindInfo* lookupUnwindInfo(void* pc) const;

// To save memory, profilingLabels_ are generated lazily when profiling mode
// is enabled.
Expand Down
Loading

0 comments on commit 8667790

Please sign in to comment.