Skip to content

Commit

Permalink
Merge: [DLRM/TF2] Fix multiepoch bug
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-kkudrynski committed Apr 14, 2022
2 parents 9753968 + 227f975 commit 9f3616f
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 0 deletions.
1 change: 1 addition & 0 deletions TensorFlow2/Recommendation/DLRM/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def op(self):
# Only one gpu is set to be visible
pipeline = pipeline.apply(tf.data.experimental.prefetch_to_device(f'/gpu:0'))
pipeline = pipeline.unbatch()
pipeline = pipeline.repeat()
return pipeline

@tf.function
Expand Down
1 change: 1 addition & 0 deletions TensorFlow2/Recommendation/DLRM/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ def main(argv):
best_auc = 0
train_begin = time.time()
for epoch in range(FLAGS.epochs):
print('Starting epoch: ', epoch)
for step in range(len(train_pipeline)):
if step == FLAGS.profiler_start_step and hvd.rank() == FLAGS.profiled_rank:
tf.profiler.experimental.start('logdir')
Expand Down

0 comments on commit 9f3616f

Please sign in to comment.