Skip to content

Commit

Permalink
retry logic, correct unblocking; reserve mshr; new bw stats partial
Browse files Browse the repository at this point in the history
  • Loading branch information
adarshpatil committed Oct 7, 2016
1 parent 1fcbcef commit 6822054
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 49 deletions.
2 changes: 0 additions & 2 deletions gem5/src/mem/DRAMCacheCtrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ class DRAMCacheCtrl(DRAMCtrl):

mshrs = Param.Unsigned(128,"Number of MSHRs (max outstanding requests)")
write_buffers = Param.Unsigned(32,"Number of write buffers")
demand_mshr_reserve = Param.Unsigned(1, "MSHRs reserved for demand access")
tgts_per_mshr = Param.Unsigned(16,"Max number of accesses per MSHR")

num_cores = Param.Unsigned("Number of CPU cores in the system")
Expand Down Expand Up @@ -211,7 +210,6 @@ class HMC_2500_x32_Cache(DDR3_1600_x64_Cache):
mem_sched_policy = 'fcfs'

mshrs = 128
demand_mshr_reserve = 1
tgts_per_mshr = 16

# for dramcache this write theshold is (cache writes + cache fills)
Expand Down
61 changes: 40 additions & 21 deletions gem5/src/mem/dram_ctrl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1249,6 +1249,7 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt)
totQLat += cmd_at - dram_pkt->entryTime;
if(dram_pkt->pkt->req->contextId() == 31) {
gpuQLat += cmd_at - dram_pkt->entryTime;
//bytesReadDRAMGPU += burstSize;
}
else {
cpuQLat += cmd_at - dram_pkt->entryTime;
Expand All @@ -1258,6 +1259,8 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt)
if (row_hit)
writeRowHits++;
bytesWritten += burstSize;
//if(dram_pkt->pkt->req->contextId() == 31)
//bytesWrittenGPU += burstSize;
perBankWrBursts[dram_pkt->bankId]++;
}
}
Expand Down Expand Up @@ -2106,6 +2109,10 @@ DRAMCtrl::regStats()
.name(name() + ".bytesReadDRAM")
.desc("Total number of bytes read from DRAM");

bytesReadDRAMGPU
.name(name() + ".bytesReadDRAMGPU")
.desc("Total number of bytes read by GPU from DRAM");

bytesReadWrQ
.name(name() + ".bytesReadWrQ")
.desc("Total number of bytes read from write queue");
Expand All @@ -2114,6 +2121,10 @@ DRAMCtrl::regStats()
.name(name() + ".bytesWritten")
.desc("Total number of bytes written to DRAM");

bytesWrittenGPU
.name(name() + ".bytesWrittenGPU")
.desc("Total number of bytes written by GPU to DRAM");

bytesReadSys
.name(name() + ".bytesReadSys")
.desc("Total read bytes from the system interface side");
Expand All @@ -2136,6 +2147,34 @@ DRAMCtrl::regStats()

avgWrBW = (bytesWritten / 1000000) / simSeconds;

avgRdBWGPU
.name(name() + ".avgRdBWGPU")
.desc("Average DRAM read bandwidth of GPU in MiByte/s")
.precision(2);

avgRdBWGPU = (bytesReadDRAMGPU / 1000000) / simSeconds;

avgWrBWGPU
.name(name() + ".avgWrBWGPU")
.desc("Average achieved write bandwidth of GPU in MiByte/s")
.precision(2);

avgWrBWGPU = (bytesWrittenGPU / 1000000) / simSeconds;

avgRdBWCPU
.name(name() + ".avgRdBWCPU")
.desc("Average DRAM read bandwidth of CPU in MiByte/s")
.precision(2);

avgRdBWCPU = ((bytesReadDRAM - bytesReadDRAMGPU) / 1000000) / simSeconds;

avgWrBWCPU
.name(name() + ".avgWrBWCPU")
.desc("Average achieved write bandwidth of CPU in MiByte/s")
.precision(2);

avgWrBWCPU = ((bytesWritten - bytesWrittenGPU) / 1000000) / simSeconds;

avgRdBWSys
.name(name() + ".avgRdBWSys")
.desc("Average system read bandwidth in MiByte/s")
Expand Down Expand Up @@ -2274,7 +2313,7 @@ DRAMCtrl::drainResume()

DRAMCtrl::MemoryPort::MemoryPort(const std::string& name, DRAMCtrl& _memory)
: QueuedSlavePort(name, &_memory, queue), queue(_memory, *this),
memory(_memory), blocked(false),sendRetryEvent(this)
memory(_memory), blocked(false)
{ }

AddrRangeList
Expand Down Expand Up @@ -2319,13 +2358,6 @@ DRAMCtrl::MemoryPort::setBlocked()
assert(!blocked);
DPRINTF(DRAM, "Port is blocking new requests\n");
blocked = true;
// if we already scheduled a retry in this cycle, but it has not yet
// happened, cancel it
if (sendRetryEvent.scheduled()) {
owner.deschedule(sendRetryEvent);
DPRINTF(DRAM, "Port descheduled retry\n");
mustSendRetry = true;
}
}

void
Expand All @@ -2334,20 +2366,7 @@ DRAMCtrl::MemoryPort::clearBlocked()
assert(blocked);
DPRINTF(DRAM, "Port is accepting new requests\n");
blocked = false;
if (mustSendRetry) {
// @TODO: need to find a better time (next cycle?)
owner.schedule(sendRetryEvent, curTick() + 1);
}
}

void
DRAMCtrl::MemoryPort::processSendRetry()
{
DPRINTF(DRAM, "Port is sending retry\n");

// reset the flag and call retry
mustSendRetry = false;
sendRetryReq();
}

DRAMCtrl*
Expand Down
14 changes: 6 additions & 8 deletions gem5/src/mem/dram_ctrl.hh
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,6 @@ class DRAMCtrl : public AbstractMemory
bool recvTimingReq(PacketPtr);

virtual AddrRangeList getAddrRanges() const;

protected:
void processSendRetry();

EventWrapper<MemoryPort,
&MemoryPort::processSendRetry> sendRetryEvent;


};

/**
Expand Down Expand Up @@ -800,8 +792,10 @@ class DRAMCtrl : public AbstractMemory
Stats::Scalar readBursts;
Stats::Scalar writeBursts;
Stats::Scalar bytesReadDRAM;
Stats::Scalar bytesReadDRAMGPU;
Stats::Scalar bytesReadWrQ;
Stats::Scalar bytesWritten;
Stats::Scalar bytesWrittenGPU;
Stats::Scalar bytesReadSys;
Stats::Scalar bytesWrittenSys;
Stats::Scalar servicedByWrQ;
Expand Down Expand Up @@ -841,7 +835,11 @@ class DRAMCtrl : public AbstractMemory

// Average bandwidth
Stats::Formula avgRdBW;
Stats::Formula avgRdBWGPU;
Stats::Formula avgRdBWCPU;
Stats::Formula avgWrBW;
Stats::Formula avgWrBWGPU;
Stats::Formula avgWrBWCPU;
Stats::Formula avgRdBWSys;
Stats::Formula avgWrBWSys;
Stats::Formula peakBW;
Expand Down
46 changes: 29 additions & 17 deletions gem5/src/mem/dramcache_ctrl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ int DRAMCacheCtrl::predAccuracy;
DRAMCacheCtrl::DRAMCacheCtrl (const DRAMCacheCtrlParams* p) :
DRAMCtrl (p), respondWriteEvent(this),
dramCache_masterport (name () + ".dramcache_masterport",*this),
mshrQueue ("MSHRs", p->mshrs, 4, p->demand_mshr_reserve, MSHRQueue_MSHRs),
writeBuffer ("write buffer", p->write_buffers, p->mshrs + 1000, 0,
mshrQueue ("MSHRs", p->mshrs, p->read_buffer_size, 0, MSHRQueue_MSHRs),
writeBuffer ("write buffer", p->write_buffers, p->write_buffer_size, 0,
MSHRQueue_WriteBuffer), dramCache_size (p->dramcache_size),
dramCache_assoc (p->dramcache_assoc),
dramCache_block_size (p->dramcache_block_size),
Expand All @@ -33,8 +33,8 @@ DRAMCacheCtrl::DRAMCacheCtrl (const DRAMCacheCtrlParams* p) :
replacement_scheme (p->dramcache_replacement_scheme), totalRows (0),
system_cache_block_size (128), // hardcoded to 128
num_sub_blocks_per_way (0), total_gpu_lines (0), total_gpu_dirty_lines (0),
order (0), numTarget (p->tgts_per_mshr), blocked (0), num_cores(p->num_cores),
dramCacheTimingMode(p->dramcache_timing),
order (0), numTarget (p->tgts_per_mshr), blocked (0), cacheMustSendRetry(false),
num_cores(p->num_cores), dramCacheTimingMode(p->dramcache_timing),
fillHighThreshold(p->fill_high_thresh_perc),
fillBufferSize(p->fill_buffer_size),
cacheFillsThisTime(0), cacheWritesThisTime(0)
Expand Down Expand Up @@ -412,7 +412,8 @@ DRAMCacheCtrl::processNextReqEvent()
// pre-emptively set to false. Overwrite if in READ_TO_WRITE
// or WRITE_TO_READ state
bool switched_cmd_type = false;
if (busState == READ_TO_WRITE) {
if (busState == READ_TO_WRITE)
{
DPRINTF(DRAMCache, "Switching to writes after %d reads with %d reads "
"waiting\n", readsThisTime, readQueue.size());

Expand Down Expand Up @@ -472,7 +473,9 @@ DRAMCacheCtrl::processNextReqEvent()
// event for the next request
return;
}
} else {
}
else
{
// bool to check if there is a read to a free rank
bool found_read = false;

Expand Down Expand Up @@ -536,12 +539,14 @@ DRAMCacheCtrl::processNextReqEvent()
// transition to writing
busState = READ_TO_WRITE;
}
} else {
}
else
{

// decide here if do we want to do fills or writes

if (!writeQueue.empty() && (fillQueue.size() < fillHighThreshold
|| writeQueue.size() > min(writeHighThreshold, fillHighThreshold)))
|| writeQueue.size() > min(writeHighThreshold, fillHighThreshold)))
{
// if writeQ is not empty and fillQ is lesser than high thresh or
// writeQ size is gerater than some high thresh (we use min of
Expand Down Expand Up @@ -601,7 +606,7 @@ DRAMCacheCtrl::processNextReqEvent()
(!writeQueue.empty() && fillQueue.size() > fillHighThreshold))
{
// write queue empty => service fills OR
// if fills have gone above high thesh => service fills
// if fills have gone above high thesh => service fills

bool found_fill = false;

Expand Down Expand Up @@ -635,8 +640,8 @@ DRAMCacheCtrl::processNextReqEvent()
{
// we should never reach here
fatal("busState write; unable determine service fills or writes "
"fillQ size %d writeQ size %d", fillQueue.size(), writeQueue.size());
}
"fillQ size %d writeQ size %d", fillQueue.size(), writeQueue.size());
}

// If we emptied the write or fill queue, or got sufficiently below the
// threshold (using the minWritesPerSwitch as the hysteresis) and
Expand All @@ -654,6 +659,7 @@ DRAMCacheCtrl::processNextReqEvent()
// also pause any further scheduling if there is really
// nothing to do
}

}

// It is possible that a refresh to another rank kicks things back into
Expand Down Expand Up @@ -734,6 +740,7 @@ DRAMCacheCtrl::processWriteRespondEvent()
PacketPtr pkt = new Packet(dram_pkt->pkt, false, true);
memcpy(pkt->getPtr<uint8_t>(), dram_pkt->pkt->getPtr<uint8_t>(),
dramCache_block_size);
pkt->popSenderState();
allocateWriteBuffer(pkt,1);
}
}
Expand Down Expand Up @@ -1353,6 +1360,7 @@ DRAMCacheCtrl::recvTimingReq (PacketPtr pkt)
if (blocked != 0)
{
DPRINTF(DRAMCache,"%s cache blocked %d", __func__, pkt->getAddr());
cacheMustSendRetry = true;
return false;
}

Expand Down Expand Up @@ -1398,15 +1406,14 @@ DRAMCacheCtrl::recvTimingReq (PacketPtr pkt)
if (mshr->getNumTargets () == numTarget)
{
noTargetMSHR = mshr;
warn("MSHR ran out of %d targets\n", numTarget);
setBlocked (Blocked_NoTargets);
}
return true;
}

// ADARSH check writeBuffer if there is outstanding write back
// service from writeBuffer only if the request is a read (accessAndRespond)
// if the request was a write it should go as a read req to memory
// we assume that memory will coalesce read and write in its queues
mshr = writeBuffer.findMatch (blk_addr, false);
if (mshr && pkt->isRead())
{
Expand Down Expand Up @@ -1662,7 +1669,7 @@ DRAMCacheCtrl::recvTimingResp (PacketPtr pkt)
pkt->cmdString(), pkt->getAddr(), pkt->getSize(), mshr->queue->index);

MSHRQueue *mq = mshr->queue;
bool wasFull = mq->isFull ();
bool wasFull = mq->isFull();

if (mshr == noTargetMSHR)
{
Expand Down Expand Up @@ -1764,7 +1771,9 @@ DRAMCacheCtrl::recvTimingResp (PacketPtr pkt)

delete pkt;
mq->deallocate (mshr);
if (wasFull)
// we check here if we were full and deallocating has created capacity
// this is because we could have used the reserves to allocate WriteBuffers/MSHR
if (wasFull && (!mq->isFull()))
clearBlocked ((BlockedCause) mq->index);

}
Expand Down Expand Up @@ -1973,6 +1982,7 @@ DRAMCacheCtrl::doDRAMAccess(DRAMPacket* dram_pkt)
totQLat += cmd_at - dram_pkt->entryTime;
if(dram_pkt->pkt->req->contextId() == 31) {
gpuQLat += cmd_at - dram_pkt->entryTime;
//bytesReadDRAMGPU += burstSize;
}
else {
cpuQLat += cmd_at - dram_pkt->entryTime;
Expand All @@ -1982,6 +1992,8 @@ DRAMCacheCtrl::doDRAMAccess(DRAMPacket* dram_pkt)
if (row_hit)
writeRowHits++;
bytesWritten += burstSize;
//if (dram_pkt->pkt->req->contextId()==31)
//bytesWrittenGPU += burstSize;
perBankWrBursts[dram_pkt->bankId]++;
}
}
Expand Down Expand Up @@ -2335,15 +2347,15 @@ DRAMCacheCtrl::DRAMCacheReqPacketQueue::sendDeferredPacket ()
PacketPtr pkt = cache.getTimingPacket();

if (pkt == NULL)
DPRINTF(DRAMCache, "sendDefferedPacket got no timing Packet");
DPRINTF(DRAMCache, "sendDefferedPacket got no timing Packet\n");
else
{
MSHR *mshr = dynamic_cast<MSHR*> (pkt->senderState);

waitingOnRetry = !masterPort.sendTimingReq (pkt);

if (waitingOnRetry){
DPRINTF(DRAMCache, "now waiting on retry");
DPRINTF(DRAMCache, "now waiting on retry\n");
delete pkt;
}
else{
Expand Down
7 changes: 6 additions & 1 deletion gem5/src/mem/dramcache_ctrl.hh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

#define DRAM_PKT_COUNT 2
#define PREDICTION_LATENCY 5
#define MAPI_PREDICTOR
#undef MAPI_PREDICTOR

class DRAMCacheCtrl : public DRAMCtrl
{
Expand Down Expand Up @@ -157,6 +157,8 @@ class DRAMCacheCtrl : public DRAMCtrl
* @sa #BlockedCause
*/
uint8_t blocked;
// remember if we recvd a request when we were blocked because of MSHR/WB full
bool cacheMustSendRetry;

int num_cores; //num of CPU cores in the system, needed for per core predictor

Expand Down Expand Up @@ -343,7 +345,10 @@ class DRAMCacheCtrl : public DRAMCtrl
if (blocked == 0) {
blocked_cycles[cause] += curCycle() - blockedCycle;
port.clearBlocked();
if (cacheMustSendRetry)
port.sendRetryReq();
}

}

/**
Expand Down

0 comments on commit 6822054

Please sign in to comment.