Skip to content

Commit

Permalink
Added config flag profile_optimizer that allow to profile the optimizer.
Browse files Browse the repository at this point in the history
  • Loading branch information
nouiz committed Jun 14, 2012
1 parent 4922359 commit ccf4f75
Show file tree
Hide file tree
Showing 4 changed files with 249 additions and 5 deletions.
6 changes: 4 additions & 2 deletions theano/compile/function_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -1049,13 +1049,15 @@ def __init__(self, inputs, outputs,
theano.config.compute_test_value = "off"
gof.Op.add_stack_trace_on_call = False
start_optimizer = time.time()
optimizer(env)
optimizer_profile = optimizer(env)
end_optimizer = time.time()

opt_time = end_optimizer - start_optimizer
mode.optimizer_time += opt_time

if profile:
profile.optimizer_time += opt_time
if theano.config.profile_optimizer:
profile.optimizer_profile = (optimizer, optimizer_profile)
_logger.debug('Optimizing took %f seconds', opt_time)

#Add deep copy to respect the memory interface
Expand Down
16 changes: 16 additions & 0 deletions theano/compile/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,15 @@ def _atexit_print_fn():
for key, val in getattr(ps, attr).iteritems():
assert key not in cum_attr
cum_attr[key] = val

if cum.optimizer_profile and ps.optimizer_profile:
merge = cum.optimizer_profile[0].merge_profile(
cum.optimizer_profile[1],
ps.optimizer_profile[1])
cum.optimizer_profile = (cum.optimizer_profile[0], merge)
else:
cum.optimizer_profile = None

cum.summary(file=_atexit_print_file)


Expand Down Expand Up @@ -133,6 +142,9 @@ class ProfileStats(object):

line_width = 140

optimizer_profile = None
# None or tuple (the optimizer, the profile it returned)

# param is called flag_time_thunks because most other attributes with time
# in the name are times *of* something, rather than configuration flags.
def __init__(self, atexit_print=True, flag_time_thunks=None, **kwargs):
Expand Down Expand Up @@ -419,6 +431,10 @@ def summary(self, file=sys.stderr, n_ops_to_print=20,
elif self.fct_callcount > 0:
print >> file, (" No node time accumulated "
"(hint: try config profiling.time_thunks=1)")
if self.optimizer_profile:
print "Optimizer Profile"
print "-----------------"
self.optimizer_profile[0].print_profile(file, self.optimizer_profile[1])


if 0: # old code still to be ported from ProfileMode
Expand Down
229 changes: 226 additions & 3 deletions theano/gof/opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def optimize(self, env, *args, **kwargs):
opt.apply(env)
"""
self.add_requirements(env)
self.apply(env, *args, **kwargs)
return self.apply(env, *args, **kwargs)

def __call__(self, env):
"""WRITEME
Expand All @@ -98,6 +98,10 @@ def print_summary(self, stream=sys.stdout, level=0, depth=-1):
print >> stream, "%s%s %s id=%i" % (
(' ' * level), self.__class__.__name__, name, id(self))

def print_profile(self, prof):
if prof is not None:
raise NotImplementedError("The function print_profile must be overrided if the optimizer return profiling information.")


class FromFunctionOptimizer(Optimizer):
"""WRITEME"""
Expand Down Expand Up @@ -157,11 +161,13 @@ def apply(self, env):
if env.profile:
validate_before = env.profile.validate_time
nb_node_before = len(env.nodes)
sub_profs = []
for optimizer in self:
try:
t0 = time.time()
optimizer.optimize(env)
sub_prof = optimizer.optimize(env)
l.append(float(time.time() - t0))
sub_profs.append(sub_prof)
except AssertionError:
# do not catch Assertion failures
raise
Expand All @@ -171,6 +177,7 @@ def apply(self, env):
continue
else:
raise

if config.time_seq_optimizer:
print "SeqOptimizer",
if hasattr(self,"name"): print self.name,
Expand All @@ -194,6 +201,12 @@ def cmp(a,b):
for (t, opt) in lll[::-1]:
print ' %.6fs - %s' % (t, opt)
print
if env.profile:
validate_time = env.profile.validate_time - validate_before
else:
validate_time = None
return (self, l, validate_time, nb_node_before,
len(env.nodes), sub_profs)

def __eq__(self, other):
#added to override the list's __eq__ implementation
Expand All @@ -219,6 +232,115 @@ def print_summary(self, stream=sys.stdout, level=0, depth=-1):
for opt in self:
opt.print_summary(stream, level=(level + 2), depth=depth)

@staticmethod
def print_profile(stream, prof, level=0):
(opts, prof, validate_time, nb_node_before,
nb_node_after, sub_profs) = prof
blanc = (' ' * level)


print >> stream, blanc, "SeqOptimizer",
if hasattr(opts, "name"):
print >> stream, blanc, opts.name,
elif hasattr(opts, "__name__"):
print >> stream, blanc, opts.__name__,
print >> stream, (" time %.3fs for %d/%d nodes"
" before/after optimization" % (
sum(prof), nb_node_before, nb_node_after))
print >> stream, blanc, " %.3fs for env.validate()" % (validate_time)
if level == 0:
print >> stream, blanc, " time - (name, class, index)"
ll = []
for opt in opts:
if hasattr(opt, "__name__"):
ll.append((opt.__name__, opt.__class__.__name__,
opts.index(opt)))
else:
ll.append((opt.name, opt.__class__.__name__,
opts.index(opt)))
lll = zip(prof, ll)

def cmp(a, b):
if a[0] == b[0]:
return 0
elif a[0] < b[0]:
return -1
return 1
lll.sort(cmp)

for (t, opt) in lll[::-1]:
#if t < 1:
# continue
print >> stream, blanc, ' %.6fs - %s' % (t, opt)
if sub_profs[opt[-1]]:
opts[opt[-1]].print_profile(stream, sub_profs[opt[-1]],
level=level + 1)
print >> stream

@staticmethod
def merge_profile(prof1, prof2):
"""
Merge 2 profiles returned by this cass apply() fct.
"""
new_t = []
new_l = []
new_sub_profile = []
#merge common(same object) opt
for l in set(prof1[0]).intersection(set(prof2[0])):
idx1 = prof1[0].index(l)
idx2 = prof2[0].index(l)
new_t.append(prof1[1][idx1] +
prof2[1][idx2])
new_l.append(l)
if hasattr(l, 'merge_profile'):
assert len(prof1[5][idx1]) == len(prof2[5][idx1])
new_sub_profile.append(l.merge_profile(prof1[5][idx1],
prof2[5][idx2]))
else:
new_sub_profile.append(None)

# merge not common opt
import StringIO
for l in set(prof1[0]).symmetric_difference(set(prof2[0])):
#The set trick above only work for the same object optimization
#It don't work for equivalent optimization.
#So we try to merge equivalent optimization here.
new_l_names = [o.name for o in new_l]
if l.name in new_l_names:
idx = new_l_names.index(l.name)
io1 = StringIO.StringIO()
io2 = StringIO.StringIO()
l.print_summary(io1)
new_l[idx].print_summary(io2)
if io1.read() == io2.read():
if l in prof1[0]:
p = prof1
else:
p = prof2
new_t[idx] += p[1][p[0].index(l)]
if hasattr(l, 'merge_profile'):
assert len(p[5][p[0].index(l)]) == len(new_sub_profile[idx])
new_sub_profile[idx] = l.merge_profile(
new_sub_profile[idx], p[5][p[0].index(l)])
else:
new_sub_profile[idx] = None
continue
if l in prof1[0]:
p = prof1
else:
p = prof2
new_t.append(p[1][p[0].index(l)])
idx = p[0].index(l)
new_l.append(l)
new_sub_profile.append(p[5][idx])

new_opt = SeqOptimizer(*new_l)
assert set(prof1[0]).issubset(set(new_l))
# assert set(prof2[0]).issubset(set(new_l))
assert len(new_t) == len(new_opt) == len(new_sub_profile)
return (new_opt, new_t, prof1[2] + prof2[2],
-1, -1, new_sub_profile)


class _metadict:
"""WRITEME"""
Expand Down Expand Up @@ -1319,7 +1441,12 @@ def apply(self, env, start_from=None):

loop_timing = []
global_opt_timing = []
time_lopts = {}
io_toposort_timing = []
nb_nodes = []
for lopt in self.local_optimizers:
process_count.setdefault(lopt, 0)
time_lopts.setdefault(lopt, 0)

while changed and not max_use_abort:
t0 = time.time()
Expand All @@ -1338,7 +1465,9 @@ def apply(self, env, start_from=None):
for node in start_from:
assert node in env.outputs

topo_t0 = time.time()
q = deque(graph.io_toposort(env.inputs, start_from))
io_toposort_timing.append(time.time() - topo_t0)

nb_nodes.append(len(q))
max_nb_nodes = max(max_nb_nodes, len(q))
Expand All @@ -1360,9 +1489,11 @@ def pruner(node):
while q:
node = q.pop()
current_node = node

for lopt in self.local_optimizers:
process_count.setdefault(lopt, 0)
t_lopt = time.time()
lopt_change = self.process_node(env, node, lopt)
time_lopts[lopt] += time.time() - t_lopt
if lopt_change:
process_count[lopt] += 1
changed = True
Expand Down Expand Up @@ -1407,6 +1538,9 @@ def pruner(node):
print ' %d - %s' % (count, opt)
print

return (self, loop_timing, process_count, max_nb_nodes,
global_opt_timing, nb_nodes, time_lopts, io_toposort_timing)

def print_summary(self, stream=sys.stdout, level=0, depth=-1):
name = getattr(self, 'name', None)
print >> stream, "%s%s %s id=%i" % (
Expand All @@ -1416,6 +1550,95 @@ def print_summary(self, stream=sys.stdout, level=0, depth=-1):
lopt.print_summary(stream, level=(level + 2),
depth=(depth - 1))

@staticmethod
def print_profile(stream, prof, level=0):
(opt, loop_timing, process_count, max_nb_nodes,
global_opt_timing, nb_nodes, time_lopts, io_toposort_timing) = prof
blanc = (' ' * level)
print >> stream, blanc, "EquilibriumOptimizer",
print >> stream, blanc, getattr(opt, "name",
getattr(opt, "__name__", ""))
print >> stream, blanc, " time %.3fs for %d passes, %d nodes max" % (
sum(loop_timing), len(loop_timing), max_nb_nodes)
print >> stream, blanc, " time io_toposort %.3fs" % sum(
io_toposort_timing)
for i in range(len(loop_timing)):
print >> stream, blanc, ('%d - %.3fs (%.3fs in global opts, '
'%.3fs io_toposort) - %d nodes' % (
i, loop_timing[i],
global_opt_timing[i],
io_toposort_timing[i], nb_nodes[i]))

count_opt = []
for opt, count in process_count.iteritems():
if count > 0:
count_opt.append((time_lopts[opt], count, opt))

if count_opt:
print >> stream, blanc, 'times applied - optimizer (only those applied):'
count_opt.sort()
for (t, count, opt) in count_opt[::-1]:
print >> stream, blanc, ' %.3fs - %d - %s' % (
t, count, opt)
print >> stream

@staticmethod
def merge_profile(prof1, prof2):
#(opt, loop_timing, process_count, max_nb_nodes,
# global_opt_timing, nb_nodes, time_lopts, io_toposort_timing) = prof1

local_optimizers = set(prof1[0].local_optimizers).union(
prof2[0].local_optimizers)
global_optimizers = set(prof1[0].global_optimizers).union(
prof2[0].global_optimizers)
new_opt = EquilibriumOptimizer(
local_optimizers.union(global_optimizers),
max_use_ratio=1)

def merge_list(l1, l2):
l = copy.copy(l1)
for idx, nb in enumerate(l2):
if idx < len(l):
l[idx] += nb
else:
l.append(nb)
return l

loop_timing = merge_list(prof1[1], prof2[1])

process_count = prof1[2].copy()
for process, count in prof2[2].iteritems():
if process in process_count:
process_count[process] += count
else:
process_count[process] = count

max_nb_nodes = max(prof1[3], prof2[3])

global_opt_timing = merge_list(prof1[4], prof2[4])

nb_nodes = merge_list(prof1[5], prof2[5])

time_lopts = prof1[6].copy()
for opt, t in prof2[6].iteritems():
if opt in time_lopts:
time_lopts[opt] += t
else:
time_lopts[opt] = t

io_toposort_timing = merge_list(prof1[7], prof2[7])

assert (len(loop_timing) == len(global_opt_timing) ==
len(io_toposort_timing) == len(nb_nodes))
assert len(loop_timing) == max(len(prof1[1]), len(prof2[1]))
return (new_opt,
loop_timing,
process_count,
max_nb_nodes,
global_opt_timing,
nb_nodes,
time_lopts,
io_toposort_timing)

#################
### Utilities ###
Expand Down
3 changes: 3 additions & 0 deletions theano/gof/vm.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
AddConfigVar('profile',
"If VM should collect profile information",
BoolParam(False))
AddConfigVar('profile_optimizer',
"If VM should collect optimizer profile information",
BoolParam(False))

raise_with_op = link.raise_with_op

Expand Down

0 comments on commit ccf4f75

Please sign in to comment.