Skip to content

Commit

Permalink
Inference timing: add a backtrace to head node (JuliaLang#38123)
Browse files Browse the repository at this point in the history
This allows one to identify the caller of runtime-dispatched methods.
This can help detect inference problems and know where to fix them.

Only "head" nodes (entries into type inference) have backtraces.
We avoid doing any processing of the backtrace here, as instruction-pointer
lookup is extremely expensive and we don't even know if the information
will be used.
  • Loading branch information
timholy authored Oct 24, 2020
1 parent ae789ca commit d474c98
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 15 deletions.
37 changes: 22 additions & 15 deletions base/compiler/typeinfer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ being used for this purpose alone.
module Timings

using Core.Compiler: -, +, :, Vector, length, first, empty!, push!, pop!, @inline,
@inbounds, copy
@inbounds, copy, backtrace

# What we record for any given frame we infer during type inference.
struct InferenceFrameInfo
Expand Down Expand Up @@ -51,9 +51,10 @@ struct Timing
cur_start_time::UInt64
time::UInt64
children::Core.Array{Timing,1}
Timing(mi_info, start_time, cur_start_time, time, children) = new(mi_info, start_time, cur_start_time, time, children)
Timing(mi_info, start_time) = new(mi_info, start_time, start_time, UInt64(0), Timing[])
bt # backtrace collected upon initial entry to typeinf
end
Timing(mi_info, start_time, cur_start_time, time, children) = Timing(mi_info, start_time, cur_start_time, time, children, nothing)
Timing(mi_info, start_time) = Timing(mi_info, start_time, start_time, UInt64(0), Timing[])

_time_ns() = ccall(:jl_hrtime, UInt64, ()) # Re-implemented here because Base not yet available.

Expand Down Expand Up @@ -89,19 +90,22 @@ reset_timings()
# (This is split into a function so that it can be called both in this module, at the top
# of `enter_new_timer()`, and once at the Very End of the operation, by whoever started
# the operation and called `reset_timings()`.)
# NOTE: the @inline annotations here are not to make it faster, but to reduce the gap between
# timer manipulations and the tasks we're timing.
@inline function close_current_timer()
stop_time = _time_ns()
parent_timer = _timings[end]
accum_time = Core.Compiler.:(-)(stop_time, parent_timer.cur_start_time)
accum_time = stop_time - parent_timer.cur_start_time

# Add in accum_time ("modify" the immutable struct)
@inbounds begin
_timings[end] = Timings.Timing(
_timings[end] = Timing(
parent_timer.mi_info,
parent_timer.start_time,
parent_timer.cur_start_time,
parent_timer.time + accum_time,
parent_timer.children,
parent_timer.bt,
)
end
return nothing
Expand All @@ -115,15 +119,15 @@ end
mi_info = _typeinf_identifier(frame)

# Start the new timer right before returning
push!(_timings, Timings.Timing(mi_info, UInt64(0)))
push!(_timings, Timing(mi_info, UInt64(0)))
len = length(_timings)
new_timer = @inbounds _timings[len]
# Set the current time _after_ appending the node, to try to exclude the
# overhead from measurement.
start = Timings._time_ns()
start = _time_ns()

@inbounds begin
_timings[len] = Timings.Timing(
_timings[len] = Timing(
new_timer.mi_info,
start,
start,
Expand All @@ -140,39 +144,42 @@ end
# children (that is, asserting that inference proceeds via depth-first-search).
@inline function exit_current_timer(_expected_frame_)
# Finish the new timer
stop_time = Timings._time_ns()
stop_time = _time_ns()

expected_mi_info = _typeinf_identifier(_expected_frame_)

# Grab the new timer again because it might have been modified in _timings
# (since it's an immutable struct)
# And remove it from the current timings stack
_timings = Timings._timings
new_timer = pop!(_timings)
Core.Compiler.@assert new_timer.mi_info.mi === expected_mi_info.mi

# Prepare to unwind one level of the stack and record in the parent
parent_timer = _timings[end]

accum_time = stop_time - new_timer.cur_start_time
# Add in accum_time ("modify" the immutable struct)
new_timer = Timings.Timing(
new_timer = Timing(
new_timer.mi_info,
new_timer.start_time,
new_timer.cur_start_time,
new_timer.time + accum_time,
new_timer.children
new_timer.children,
parent_timer.mi_info.mi === ROOTmi ? backtrace() : nothing,
)
# Record the final timing with the original parent timer
parent_timer = _timings[end]
push!(parent_timer.children, new_timer)

# And finally restart the parent timer:
len = length(_timings)
@inbounds begin
_timings[len] = Timings.Timing(
_timings[len] = Timing(
parent_timer.mi_info,
parent_timer.start_time,
Timings._time_ns(),
_time_ns(),
parent_timer.time,
parent_timer.children,
parent_timer.bt,
)
end

Expand Down
2 changes: 2 additions & 0 deletions test/compiler/inference.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2882,6 +2882,8 @@ end
# The last two functions to be inferred should be `i` and `i2`, inferred at runtime with
# their concrete types.
@test sort([mi_info.mi.def.name for (time,mi_info) in flatten_times(timing1)[end-1:end]]) == [:i, :i2]
@test all(child->isa(child.bt, Vector), timing1.children)
@test all(child->child.bt===nothing, timing1.children[1].children)
# Test that inference has cached some of the Method Instances
timing2 = time_inference() do
@eval M.g(2, 3.0)
Expand Down

0 comments on commit d474c98

Please sign in to comment.