Skip to content

Commit

Permalink
Suggest in the Profiler's Overview Page that Kernel Launch time may b…
Browse files Browse the repository at this point in the history
…e due to CPU contention with tf.data.

PiperOrigin-RevId: 307713646
Change-Id: I4cf6fcc5986858f7c20ddd5f96803c367885851c
  • Loading branch information
tensorflower-gardener committed Apr 22, 2020
1 parent 838b19e commit 1ac4f9e
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ constexpr int kHostAnalysisSectionNumber = 3;
const char* kAllOthersPythonExplanation =
" % of the total step time sampled is spent on 'All Others' time. "
"This could be due to Python execution overhead.";
// Explanation for "Kernel Launch" time due to CPU contention with tf.data.
const char* kKernelLaunchTfDataContention =
" It could be due to CPU contention with tf.data. In this case, you may "
"try to set the environment variable TF_GPU_THREAD_MODE=gpu_private.";

template <class Collection>
double GetTimeInMs(const Collection& type_ps, EventType event_type) {
Expand Down Expand Up @@ -357,7 +361,7 @@ double RatioOfHostToDeviceTimeToStepTime(
return 0.0;
}

void KernelLaunchAnalysis(double kernel_launch_percent,
void KernelLaunchAnalysis(bool tfdata_used, double kernel_launch_percent,
string* kernel_launch_classification,
string* kernel_launch_statement) {
string percent_str = absl::StrFormat("%.1lf", kernel_launch_percent);
Expand All @@ -366,12 +370,18 @@ void KernelLaunchAnalysis(double kernel_launch_percent,
*kernel_launch_statement = absl::StrCat(
percent_str,
" % of the total step time sampled is spent on 'Kernel Launch'.");
if (tfdata_used) {
absl::StrAppend(kernel_launch_statement, kKernelLaunchTfDataContention);
}
} else if (kernel_launch_percent >=
kModeratelyKernelLaunchBoundThresholdInPercent) {
*kernel_launch_classification = "moderate";
*kernel_launch_statement = absl::StrCat(
percent_str,
" % of the total step time sampled is spent on 'Kernel Launch'.");
if (tfdata_used) {
absl::StrAppend(kernel_launch_statement, kKernelLaunchTfDataContention);
}
} else {
*kernel_launch_classification = "no";
*kernel_launch_statement = "";
Expand Down Expand Up @@ -566,8 +576,8 @@ InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis(
GenerateHostResult(op_stats.host_op_metrics_db(), &result);

InputPipelineAnalysisRecommendation recommendation = GenerateRecommendation();
BottleneckAnalysis bottleneck_analysis =
ComputeBottleneckAnalysis(result.step_details());
BottleneckAnalysis bottleneck_analysis = ComputeBottleneckAnalysis(
result.input_time_breakdown(), result.step_details());
recommendation.mutable_bottleneck_analysis()->PackFrom(bottleneck_analysis);
*recommendation.mutable_summary_next_step() =
GetSummaryNextStep(bottleneck_analysis.input_classification(),
Expand Down Expand Up @@ -646,6 +656,7 @@ void OutputAnalysis(double output_percent, string* output_classification,
}

BottleneckAnalysis ComputeBottleneckAnalysis(
const InputTimeBreakdown& input_time_breakdown,
const ::tensorflow::protobuf::RepeatedPtrField<::google::protobuf::Any>&
any_step_details) {
double total_step_time_ms = 0;
Expand Down Expand Up @@ -700,8 +711,8 @@ BottleneckAnalysis ComputeBottleneckAnalysis(

string kernel_launch_classification;
string kernel_launch_statement;
KernelLaunchAnalysis(kernel_launch_percent, &kernel_launch_classification,
&kernel_launch_statement);
KernelLaunchAnalysis(TfDataInUse(input_time_breakdown), kernel_launch_percent,
&kernel_launch_classification, &kernel_launch_statement);

string all_other_classification;
string all_other_statement;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ InputPipelineAnalysisRecommendation GenerateRecommendation();

// Returns the performance bottleneck of the program executed.
BottleneckAnalysis ComputeBottleneckAnalysis(
const InputTimeBreakdown& input_time_breakdown,
const ::tensorflow::protobuf::RepeatedPtrField<::google::protobuf::Any>&
any_step_details);

Expand Down
5 changes: 3 additions & 2 deletions tensorflow/core/profiler/convert/op_stats_to_overview_page.cc
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,9 @@ OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
*overview_page.mutable_analysis() = ComputeAnalysisResult(op_stats);
*overview_page.mutable_input_analysis() =
ConvertOpStatsToInputPipelineAnalysis(op_stats, hardware_type);
BottleneckAnalysis bottleneck =
ComputeBottleneckAnalysis(overview_page.input_analysis().step_details());
BottleneckAnalysis bottleneck = ComputeBottleneckAnalysis(
overview_page.input_analysis().input_time_breakdown(),
overview_page.input_analysis().step_details());
*overview_page.mutable_recommendation() = ComputeGenericRecommendation(
bottleneck, op_stats.device_op_metrics_db().precision_stats());
SetCommonRecommendation(bottleneck.input_classification(),
Expand Down

0 comments on commit 1ac4f9e

Please sign in to comment.