You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hello,
I ran tuned ARM cpu full system simulation for a large workload (that takes 8 hours on a supercomputer) using gem5-dev branch code.
However, with this commit, sometimes my job is killed which I think is due to excessive memory requirement. If I run a small workload then it works fine. Could you identify or estimate, where can we improve memory management in terms of the implementation of fdp and/or the associative BTB?
I changed associative BTB to simpleBTB_v2 like below:
namespace branch_prediction
{
SimpleBTB::SimpleBTB(const SimpleBTBParams &p)
: BranchTargetBuffer(p),
numEntries(p.numEntries),
tagBits(p.tagBits),
instShiftAmt(p.instShiftAmt),
log2NumThreads(floorLog2(p.numThreads))
{
DPRINTF(BTB, "BTB: Creating BTB object.\n");
if (!isPowerOf2(numEntries)) {
fatal("BTB entries is not a power of 2!");
}
btb.resize(numEntries);
for (unsigned i = 0; i < numEntries; ++i) {
btb[i].valid = false;
}
idxMask = numEntries - 1;
tagMask = (1 << tagBits) - 1;
tagShiftAmt = instShiftAmt + floorLog2(numEntries);
}
void
SimpleBTB::memInvalidate()
{
for (unsigned i = 0; i < numEntries; ++i) {
btb[i].valid = false;
}
}
inline
unsigned
SimpleBTB::getIndex(Addr instPC, ThreadID tid)
{
// Need to shift PC over by the word offset.
return ((instPC >> instShiftAmt)
^ (tid << (tagShiftAmt - instShiftAmt - log2NumThreads)))
& idxMask;
}
inline
Addr
SimpleBTB::getTag(Addr instPC)
{
return (instPC >> tagShiftAmt) & tagMask;
}
SimpleBTB::BTBEntry *
SimpleBTB::findEntry(Addr instPC, ThreadID tid)
{
unsigned btb_idx = getIndex(instPC, tid);
Addr inst_tag = getTag(instPC);
assert(btb_idx < numEntries);
if (btb[btb_idx].valid
&& inst_tag == btb[btb_idx].tag
&& btb[btb_idx].tid == tid) {
return &btb[btb_idx];
}
return nullptr;
}
bool
SimpleBTB::valid(ThreadID tid, Addr instPC)
{
BTBEntry *entry = findEntry(instPC, tid);
return entry != nullptr;
}
// @todo Create some sort of return struct that has both whether or not the
// address is valid, and also the address. For now will just use addr = 0 to
// represent invalid entry.
const PCStateBase *
SimpleBTB::lookup(ThreadID tid, Addr instPC, BranchType type)
{
stats.lookups[type]++;
BTBEntry *entry = findEntry(instPC, tid);
if (entry) {
return entry->target.get();
}
stats.misses[type]++;
return nullptr;
}
const StaticInstPtr
SimpleBTB::lookupInst(ThreadID tid, Addr instPC)
{
BTBEntry *entry = findEntry(instPC, tid);
if (entry) {
return entry->inst;
}
return nullptr;
}
void
SimpleBTB::update(ThreadID tid, Addr instPC,
const PCStateBase &target,
BranchType type, StaticInstPtr inst)
{
unsigned btb_idx = getIndex(instPC, tid);
assert(btb_idx < numEntries);
stats.updates[type]++;
btb[btb_idx].tid = tid;
btb[btb_idx].valid = true;
set(btb[btb_idx].target, target);
btb[btb_idx].tag = getTag(instPC);
btb[btb_idx].inst = inst;
}
} // namespace branch_prediction
} // namespace gem5
The text was updated successfully, but these errors were encountered:
Hello,
I ran tuned ARM cpu full system simulation for a large workload (that takes 8 hours on a supercomputer) using gem5-dev branch code.
However, with this commit, sometimes my job is killed which I think is due to excessive memory requirement. If I run a small workload then it works fine. Could you identify or estimate, where can we improve memory management in terms of the implementation of fdp and/or the associative BTB?
I changed associative BTB to simpleBTB_v2 like below:
The text was updated successfully, but these errors were encountered: