Skip to content

Commit

Permalink
check for empty params in group (NVIDIA#1596)
Browse files Browse the repository at this point in the history
* check for empty params in group

Signed-off-by: arendu <[email protected]>

* skip if group params are empty

Signed-off-by: arendu <[email protected]>

* skip if group params are empty

Signed-off-by: arendu <[email protected]>

* Frozen model unit test

* remove unnecessary white space

---------

Signed-off-by: arendu <[email protected]>
Co-authored-by: JimmyZhang12 <[email protected]>
Co-authored-by: Jimmy Zhang <[email protected]>
  • Loading branch information
3 people authored Mar 23, 2023
1 parent 4060aef commit 0420735
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 1 deletion.
6 changes: 5 additions & 1 deletion apex/optimizers/fused_adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,10 @@ def __init__(self, params, lr=1e-3, bias_correction=True,
self.capturable = capturable

if capturable:
device = self.param_groups[0]['params'][0].device
for idx, group in enumerate(self.param_groups):
if len(group['params']) == 0:
continue
device = group['params'][0].device
for item in ['lr']:
self.param_groups[idx][item] = group[item].to(device=device)

Expand Down Expand Up @@ -118,6 +120,8 @@ def step(self, closure=None, grads=None, output_params=None, scale=None, grad_no
loss = closure()

for group in self.param_groups:
if len(group['params']) == 0:
continue
device = group['params'][0].device
bias_correction = 1 if group['bias_correction'] else 0
beta1, beta2 = group['betas']
Expand Down
21 changes: 21 additions & 0 deletions tests/L0/run_optimizers/test_fused_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,29 @@ def test_adam_option(self):

self.assertLessEqual(max_abs_diff, self.max_abs_diff)
self.assertLessEqual(max_rel_diff, self.max_rel_diff)

def test_frozen_model(self):
nelem = 1
adam_option = {'lr':0.01, 'betas':(0.6, 0.9), 'eps':3e-06,
'weight_decay':0, 'amsgrad':False}

tensor = torch.rand(nelem, dtype=torch.float, device='cuda')
ref_param, tst_param, ref_optim, tst_optim = \
self.gen_param_optim([tensor], adam_option)

#Add an empty param group which may occur for pipeline parallel p-tuning
tst_optim.add_param_group({"params": []})

for i in range(self.iters):
self.gen_grad(ref_param, tst_param)
ref_optim.step()
tst_optim.step()
max_abs_diff, max_rel_diff = self.get_max_diff(ref_param, tst_param)

self.assertLessEqual(max_abs_diff, self.max_abs_diff)
self.assertLessEqual(max_rel_diff, self.max_rel_diff)


class TestFusedAdagrad(TestFusedOptimizer):
def __init__(self, *args, **kwargs):
super(TestFusedAdagrad, self).__init__(*args, **kwargs)
Expand Down

0 comments on commit 0420735

Please sign in to comment.