Skip to content

Commit

Permalink
[FEATURE] opRoofline in pytorch namespace
Browse files Browse the repository at this point in the history
Applying roofline analysis for OP.

We aggregate all one op's kernel information for specific
op.

Signed-off-by: YushuoEdge <[email protected]>
  • Loading branch information
YushuoEdge committed Apr 27, 2022
1 parent 356d5e0 commit 547cf32
Show file tree
Hide file tree
Showing 19 changed files with 148 additions and 18 deletions.
68 changes: 64 additions & 4 deletions src/amanda/profiler/metrics.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from itertools import count
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from utils import findTopK

def drawRoofline(hardwareTFlops, hardwareIntensity, X, Y):
def drawRoofline(hardwareTFlops, hardwareIntensity, X, Y, op=False):
maxright=0
for x in X:
if x > maxright:
Expand Down Expand Up @@ -47,7 +48,10 @@ def func2(x):
for i in range(len(X)):
ax.scatter(X[i], Y[i], color="black", alpha=1, zorder=2, s=10)

plt.savefig("./Experiments/kernelRoofline_result.png")
if op == True:
plt.savefig("./Experiments/opRoofline_result.png")
else:
plt.savefig("./Experiments/kernelRoofline_result.png")



Expand Down Expand Up @@ -175,9 +179,12 @@ def kernelRoofline(supplyInfo, countData):
spOp = spAdd + spMul + spFma * 2
cyclesElapsed = dataList[index+5].gpuValue

# n*1e-6M / 1350MHz s
time = cyclesElapsed / (supplyInfo[0] * 1024 * 1024)
tflops = (spOp / time) / (1024 * 1024 * 1024)
# (n / ts) * 1e-12
tflops = (spOp / time) / (1024 * 1024 * 1024 * 1024)
kernelY.append(round(tflops, 2))
# n / mB
intensity = spOp / (dramRead + dramWrite)
kernelX.append(intensity)

Expand Down Expand Up @@ -314,4 +321,57 @@ def opInfoCounter(dataList, flopCount=True):
print(resCounter)
resCounter.to_csv("./Experiments/opInfoCounter_result.csv", index=False, sep=',')

return infoList
return infoList

# Op Roofline Analysis:
# supplyInfo: [Frequency(MHz), Peak-Performance(TFlops), Throughput(GB/s)]
def opRoofline(supplyInfo, countData):

# hardware TFLOPS & Intersity
hardwareTFlops = supplyInfo[1]
hardwareIntensity = (hardwareTFlops * 1000) / supplyInfo[2]

# Filter Count Data, aggregate the information
opPosition = []
opKernelNum = []
for i in range(len(countData)):
if countData[i].rangeName == "NEW OP":
opPosition.append(i)

for i in range(len(opPosition) - 1):
kernelNum = (opPosition[i+1] - opPosition[i] -1) / 6
opKernelNum.append(kernelNum)
kernelNum = (len(countData) - opPosition[-1] - 1) / 6
opKernelNum.append(kernelNum)

opX = []
opY = []
numValue = 6
for i in range(len(opPosition)):
if opKernelNum[i] == 0:
continue

totalDramRead = 0
totalDramWrite = 0
totalCyclesElapsed = 0
totalFlopCount = 0
for j in range(int(opKernelNum[i])):
kernelPosition = j * numValue + opPosition[i] + 1
totalDramRead += countData[kernelPosition].gpuValue
totalDramWrite += countData[kernelPosition+1].gpuValue
totalCyclesElapsed += countData[kernelPosition+5].gpuValue
spAdd = countData[kernelPosition+2].gpuValue
spFma = countData[kernelPosition+3].gpuValue
spMul = countData[kernelPosition+4].gpuValue
totalFlopCount += spAdd + spMul + spFma * 2

# nM / 1350MHz s
time = totalCyclesElapsed / (supplyInfo[0] * 1024 * 1024)
# (nM / ts) * 1e-6 T
tflops = (totalFlopCount / time) / (1024 * 1024 * 1024 * 1024)
opY.append(round(tflops, 2))
# nM / mMB
intensity = totalFlopCount / (totalDramRead + totalDramWrite)
opX.append(intensity)

drawRoofline(hardwareTFlops, hardwareIntensity, opX, opY)
3 changes: 2 additions & 1 deletion src/amanda/profiler/pytorch/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from amanda_tracer import *
from amanda_counter import *
from amanda_counter import *
from profiler import *
2 changes: 1 addition & 1 deletion src/amanda/profiler/pytorch/amanda_counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import amanda

sys.path.append('../')
sys.path.append('../..')
from counter import counter

class amandaCounter(amanda.Tool):
Expand Down
2 changes: 1 addition & 1 deletion src/amanda/profiler/pytorch/amanda_tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import amanda

sys.path.append('../')
sys.path.append('../..')
from tracer import tracer

class amandaTracer(amanda.Tool):
Expand Down
8 changes: 7 additions & 1 deletion src/amanda/profiler/pytorch/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from amanda_counter import amandaCounter

from utils import setConfigsMetric
from metrics import kernelRoofline
from metrics import kernelRoofline, opRoofline
from torchMetrics import kernelInfo, opInfo

class Profiler():
Expand Down Expand Up @@ -69,6 +69,12 @@ def showResults(self):
opInfo(self.opList, self.startTimeList, self.endTimeList, self.traceDataApi, self.traceDataRt, self.countData)
return

if self.__metric == "OpRoofline":
self.countData = self.counter.getCountData()
assert len(self.supplyInfo) == 3, "Please provide correct hardware parameters"
opRoofline(self.supplyInfo, self.countData)
return

sys.exit("Profiler.Metric: " + self.__metric + " not supported")


Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from asyncore import file_dispatcher
import amanda
import torch
import sys
import torchvision

sys.path.append("..")
from amanda_counter import amandaCounter

def main():
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import sys

import amanda
import torch
import torchvision

sys.path.append("..")
from profiler import Profiler

def main():
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import sys

import amanda
import torch
import torchvision
from pytorch.profiler import Profiler

sys.path.append("..")
from profiler import Profiler

def main():

device = "cuda"

model = torchvision.models.resnet50().to(device)
x = torch.rand((32, 3, 227, 227)).to(device)
x = torch.rand((64, 3, 227, 227)).to(device)

metric = "KernelRoofline"
# Nvidia Geforce RTX 2080 Ti: 1350MHz, 13.45 Single-Precision TFlops, 616GB/s
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import sys

import amanda
import torch
import torchvision

sys.path.append("..")
from profiler import Profiler

def main():
Expand Down
29 changes: 29 additions & 0 deletions src/amanda/profiler/pytorch/test/opRoofline_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import sys

import amanda
import torch
import torchvision

sys.path.append("..")
from profiler import Profiler

def main():

device = "cuda"

model = torchvision.models.resnet50().to(device)
x = torch.rand((32, 3, 227, 227)).to(device)

metric = "OpRoofline"
# Nvidia Geforce RTX 2080 Ti: 1350MHz, 13.45 Single-Precision TFlops, 616GB/s
supplyInfo = [1350, 13.45, 616]
profiler = Profiler(metric)
profiler.setConfigs(metric=metric, supplyInfo=supplyInfo)

with amanda.tool.apply(profiler.counter):
y = model(x)

profiler.showResults()

if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from sklearn.preprocessing import KernelCenterer
import amanda
import torch
import sys
import torchvision

sys.path.append("..")
from amanda_tracer import amandaTracer

def main():
Expand Down
4 changes: 3 additions & 1 deletion src/amanda/profiler/tensorflow/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from amanda_tracer import *
from amanda_tracer import *
from amanda_counter import *
from profiler import *
2 changes: 1 addition & 1 deletion src/amanda/profiler/tensorflow/amanda_counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import tensorflow as tf

import amanda
sys.path.append('../')
sys.path.append('../..')
from counter import counter


Expand Down
2 changes: 1 addition & 1 deletion src/amanda/profiler/tensorflow/amanda_tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import tensorflow as tf

import amanda
sys.path.append('../')
sys.path.append('../..')
from tracer import tracer


Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import os
import sys
import amanda
import tensorflow as tf
from tensorflow.python.client import timeline
from examples.common.tensorflow.model.resnet_50 import ResNet50

sys.path.append("..")
from amanda_counter import amandaCounter
from examples.common.tensorflow.model.resnet_50 import ResNet50

def main():

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import sys

import amanda
import tensorflow as tf
from examples.common.tensorflow.model.resnet_50 import ResNet50

sys.path.append("..")
from profiler import Profiler
from examples.common.tensorflow.model.resnet_50 import ResNet50

def main():

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import sys

import amanda
import tensorflow as tf
from examples.common.tensorflow.model.resnet_50 import ResNet50

sys.path.append("..")
from profiler import Profiler
from examples.common.tensorflow.model.resnet_50 import ResNet50

def main():

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import sys

from torch import int32
import amanda
import tensorflow as tf
from tensorflow.python.client import timeline
from examples.common.tensorflow.model.resnet_50 import ResNet50

sys.path.append("..")
from amanda_tracer import amandaTracer
from examples.common.tensorflow.model.resnet_50 import ResNet50

def main():

Expand Down
6 changes: 6 additions & 0 deletions src/amanda/profiler/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,11 @@ def setConfigsMetric(metric, tracer, counter, flopCount=True):
counter.setKindFlag(0x5 | 0x42 << 42)
counter.setFilePath("./Experiments/kernel_metrics.txt")
return

# set configs for op roofline analysis
if metric == "OpRoofline" and flopCount:
counter.setKindFlag(0x1C05 | 0x1 << 42)
counter.setFilePath("./Experiments/kernel_metrics.txt")
return

sys.exit("Invalid Metric")

0 comments on commit 547cf32

Please sign in to comment.