BatchNorm update bug fix. Replaced mutable=True in model.apply() …

…during training with `mutable=['batch_stats', 'get_bounds']`, otherwise BN statistics would not get updated during training due to recent change in flax BN implementation. PiperOrigin-RevId: 374920534
hjh1213 · May 20, 2021 · ae9d07f · ae9d07f
1 parent e4252b1
commit ae9d07f
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/aqt/jax/imagenet/train_utils.py b/aqt/jax/imagenet/train_utils.py
@@ -85,17 +85,17 @@ def train_step(model, state, batch, hparams, update_bounds, learning_rate_fn):
 
   def loss_fn(params):
     """loss function used for training."""
-    variables = {'params': params, **state.model_state}
+    variables = {'params': params}
+    variables.update(state.model_state)
     logits, new_model_state = model.apply(
-        variables, batch['image'], mutable=True)
+        variables, batch['image'], mutable=['batch_stats', 'get_bounds'])
     loss = cross_entropy_loss(logits, batch['label'])
     weight_penalty_params = jax.tree_leaves(variables['params'])
     weight_decay = hparams.weight_decay
     weight_l2 = sum(
         [jnp.sum(x**2) for x in weight_penalty_params if x.ndim > 1])
     weight_penalty = weight_decay * 0.5 * weight_l2
     loss = loss + weight_penalty
-    new_model_state, _ = new_model_state.pop('params')
     return loss, (new_model_state, logits)
 
   step = state.step