Skip to content

Commit e6c60ab

Browse files
Increment the epoch before running evaluation (ludwig-ai#3729)
Co-authored-by: Justin Zhao <[email protected]>
1 parent e52f346 commit e6c60ab

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

ludwig/trainers/trainer.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1162,6 +1162,11 @@ def _train_loop(
11621162
# batch duration measurements when using timer callbacks.
11631163
self.callback(lambda c: c.on_batch_end(self, progress_tracker, save_path, sync_step=should_step))
11641164

1165+
# If this is the last batch in the epoch, increment before running evaluation so that metrics are reported
1166+
# with the correct epoch.
1167+
if batcher.last_batch():
1168+
progress_tracker.epoch += 1
1169+
11651170
if progress_tracker.steps % final_steps_per_checkpoint == 0:
11661171
if not self.skip_all_evaluation:
11671172
# Publishes metrics to MLFLow if there are any MLFlow callbacks.
@@ -1188,7 +1193,7 @@ def _train_loop(
11881193
# Checkpoint the model.
11891194
# NOTE: Ideally we would do this before evaluation, but for some reason DeepSpeed will complain
11901195
# about inflight params if we do that, which is why we checkpoint after eval instead. In practice,
1191-
# this should not make a difference, xcept in the unlikely event an error occurs during eval and we
1196+
# this should not make a difference, except in the unlikely event an error occurs during eval and we
11921197
# want to resume from the last checkpoint, in which case we will lose slightly more progress this way.
11931198
if not self.skip_save_progress:
11941199
checkpoint_manager.save(progress_tracker.steps)
@@ -1197,7 +1202,6 @@ def _train_loop(
11971202

11981203
# If this was the last batch, then increment the epoch counter and invoke the `on_epoch_end` callback.
11991204
if batcher.last_batch():
1200-
progress_tracker.epoch += 1
12011205
self.callback(lambda c: c.on_epoch_end(self, progress_tracker, save_path))
12021206

12031207
return should_break

0 commit comments

Comments
 (0)